summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
Diffstat (limited to 'xlators')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c9
1 files changed, 9 insertions, 0 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
index 73646ec4bfc..890ccf06cdc 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
@@ -1832,6 +1832,10 @@ glusterd_mgmt_v3_lock_peers (call_frame_t *frame, xlator_t *this,
(xdrproc_t)xdr_gd1_mgmt_v3_lock_req);
out:
gf_msg_debug (this->name, 0, "Returning %d", ret);
+ if (dict)
+ dict_unref (dict);
+ if (req.dict.dict_val)
+ GF_FREE (req.dict.dict_val);
return ret;
}
@@ -1909,6 +1913,11 @@ glusterd_mgmt_v3_unlock_peers (call_frame_t *frame, xlator_t *this,
xdr_gd1_mgmt_v3_unlock_req);
out:
gf_msg_debug (this->name, 0, "Returning %d", ret);
+ if (dict)
+ dict_unref(dict);
+
+ if (req.dict.dict_val)
+ GF_FREE (req.dict.dict_val);
return ret;
}
style='width: 89.5%;'/> -rw-r--r--xlators/cluster/afr/src/afr-dir-write.h33
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c2800
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.h32
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c3304
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.h78
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c2130
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h53
-rw-r--r--xlators/cluster/afr/src/afr-messages.h282
-rw-r--r--xlators/cluster/afr/src/afr-open.c533
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c588
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c3580
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c1407
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c1832
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c820
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-name.c1092
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h440
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c2364
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h82
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c3954
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h61
-rw-r--r--xlators/cluster/afr/src/afr.c1866
-rw-r--r--xlators/cluster/afr/src/afr.h1886
-rw-r--r--xlators/cluster/afr/src/pump.c2477
-rw-r--r--xlators/cluster/afr/src/pump.h81
-rw-r--r--xlators/cluster/dht/src/Makefile.am35
-rw-r--r--xlators/cluster/dht/src/dht-common.c15060
-rw-r--r--xlators/cluster/dht/src/dht-common.h1919
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c794
-rw-r--r--xlators/cluster/dht/src/dht-hashfn.c151
-rw-r--r--xlators/cluster/dht/src/dht-helper.c3266
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c2175
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c1933
-rw-r--r--xlators/cluster/dht/src/dht-layout.c1259
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c519
-rw-r--r--xlators/cluster/dht/src/dht-lock.c1392
-rw-r--r--xlators/cluster/dht/src/dht-lock.h91
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h39
-rw-r--r--xlators/cluster/dht/src/dht-messages.h825
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c5961
-rw-r--r--xlators/cluster/dht/src/dht-rename.c2770
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c3842
-rw-r--r--xlators/cluster/dht/src/dht-shared.c1624
-rw-r--r--xlators/cluster/dht/src/dht.c165
-rw-r--r--xlators/cluster/dht/src/nufa.c1096
-rw-r--r--xlators/cluster/dht/src/switch.c1477
-rw-r--r--xlators/cluster/dht/src/tier.c1222
-rw-r--r--xlators/cluster/dht/src/tier.h73
-rw-r--r--xlators/cluster/dht/src/unittest/dht_layout_mock.c44
-rw-r--r--xlators/cluster/dht/src/unittest/dht_layout_unittest.c29
-rw-r--r--xlators/cluster/ec/src/Makefile.am39
-rw-r--r--xlators/cluster/ec/src/ec-code-avx.c109
-rw-r--r--xlators/cluster/ec/src/ec-code-avx.h (renamed from xlators/cluster/dht/src/dht-helper.h)15
-rw-r--r--xlators/cluster/ec/src/ec-code-c.c11679
-rw-r--r--xlators/cluster/ec/src/ec-code-c.h27
-rw-r--r--xlators/cluster/ec/src/ec-code-intel.c594
-rw-r--r--xlators/cluster/ec/src/ec-code-intel.h191
-rw-r--r--xlators/cluster/ec/src/ec-code-sse.c101
-rw-r--r--xlators/cluster/ec/src/ec-code-sse.h (renamed from xlators/features/changetimerecorder/src/ctr_mem_types.h)16
-rw-r--r--xlators/cluster/ec/src/ec-code-x64.c144
-rw-r--r--xlators/cluster/ec/src/ec-code-x64.h18
-rw-r--r--xlators/cluster/ec/src/ec-code.c1060
-rw-r--r--xlators/cluster/ec/src/ec-code.h44
-rw-r--r--xlators/cluster/ec/src/ec-combine.c999
-rw-r--r--xlators/cluster/ec/src/ec-combine.h37
-rw-r--r--xlators/cluster/ec/src/ec-common.c3214
-rw-r--r--xlators/cluster/ec/src/ec-common.h297
-rw-r--r--xlators/cluster/ec/src/ec-data.c224
-rw-r--r--xlators/cluster/ec/src/ec-data.h306
-rw-r--r--xlators/cluster/ec/src/ec-dir-read.c616
-rw-r--r--xlators/cluster/ec/src/ec-dir-write.c1328
-rw-r--r--xlators/cluster/ec/src/ec-fops.h419
-rw-r--r--xlators/cluster/ec/src/ec-galois.c183
-rw-r--r--xlators/cluster/ec/src/ec-galois.h32
-rw-r--r--xlators/cluster/ec/src/ec-generic.c1436
-rw-r--r--xlators/cluster/ec/src/ec-gf.c11635
-rw-r--r--xlators/cluster/ec/src/ec-gf8.c5882
-rw-r--r--xlators/cluster/ec/src/ec-gf8.h (renamed from xlators/cluster/ec/src/ec-gf.h)11
-rw-r--r--xlators/cluster/ec/src/ec-heal.c3979
-rw-r--r--xlators/cluster/ec/src/ec-heald.c968
-rw-r--r--xlators/cluster/ec/src/ec-heald.h41
-rw-r--r--xlators/cluster/ec/src/ec-helpers.c680
-rw-r--r--xlators/cluster/ec/src/ec-helpers.h200
-rw-r--r--xlators/cluster/ec/src/ec-inode-read.c1755
-rw-r--r--xlators/cluster/ec/src/ec-inode-write.c2424
-rw-r--r--xlators/cluster/ec/src/ec-locks.c1083
-rw-r--r--xlators/cluster/ec/src/ec-mem-types.h11
-rw-r--r--xlators/cluster/ec/src/ec-messages.h61
-rw-r--r--xlators/cluster/ec/src/ec-method.c488
-rw-r--r--xlators/cluster/ec/src/ec-method.h34
-rw-r--r--xlators/cluster/ec/src/ec-types.h690
-rw-r--r--xlators/cluster/ec/src/ec.c1855
-rw-r--r--xlators/cluster/ec/src/ec.h54
-rw-r--r--xlators/cluster/ha/src/Makefile.am16
-rw-r--r--xlators/cluster/ha/src/ha-helpers.c194
-rw-r--r--xlators/cluster/ha/src/ha-mem-types.h26
-rw-r--r--xlators/cluster/ha/src/ha.c4013
-rw-r--r--xlators/cluster/ha/src/ha.h53
-rw-r--r--xlators/cluster/map/src/Makefile.am16
-rw-r--r--xlators/cluster/map/src/map-helper.c348
-rw-r--r--xlators/cluster/map/src/map-mem-types.h24
-rw-r--r--xlators/cluster/map/src/map.c2566
-rw-r--r--xlators/cluster/map/src/map.h67
-rw-r--r--xlators/cluster/stripe/src/Makefile.am19
-rw-r--r--xlators/cluster/stripe/src/stripe-helpers.c677
-rw-r--r--xlators/cluster/stripe/src/stripe-mem-types.h31
-rw-r--r--xlators/cluster/stripe/src/stripe.c5752
-rw-r--r--xlators/cluster/stripe/src/stripe.h286
-rw-r--r--xlators/debug/Makefile.am2
-rw-r--r--xlators/debug/delay-gen/Makefile.am (renamed from xlators/features/changetimerecorder/Makefile.am)0
-rw-r--r--xlators/debug/delay-gen/src/Makefile.am11
-rw-r--r--xlators/debug/delay-gen/src/delay-gen-mem-types.h21
-rw-r--r--xlators/debug/delay-gen/src/delay-gen-messages.h26
-rw-r--r--xlators/debug/delay-gen/src/delay-gen.c697
-rw-r--r--xlators/debug/delay-gen/src/delay-gen.h27
-rw-r--r--xlators/debug/error-gen/src/Makefile.am5
-rw-r--r--xlators/debug/error-gen/src/error-gen-mem-types.h6
-rw-r--r--xlators/debug/error-gen/src/error-gen.c2997
-rw-r--r--xlators/debug/error-gen/src/error-gen.h31
-rw-r--r--xlators/debug/io-stats/src/Makefile.am7
-rw-r--r--xlators/debug/io-stats/src/io-stats-mem-types.h17
-rw-r--r--xlators/debug/io-stats/src/io-stats.c6165
-rw-r--r--xlators/debug/sink/Makefile.am (renamed from xlators/features/qemu-block/Makefile.am)1
-rw-r--r--xlators/debug/sink/src/Makefile.am14
-rw-r--r--xlators/debug/sink/src/sink.c94
-rw-r--r--xlators/debug/trace/src/Makefile.am5
-rw-r--r--xlators/debug/trace/src/trace-mem-types.h7
-rw-r--r--xlators/debug/trace/src/trace.c5412
-rw-r--r--xlators/debug/trace/src/trace.h70
-rw-r--r--xlators/encryption/Makefile.am3
-rw-r--r--xlators/encryption/crypt/src/Makefile.am24
-rw-r--r--xlators/encryption/crypt/src/atom.c962
-rw-r--r--xlators/encryption/crypt/src/crypt-common.h141
-rw-r--r--xlators/encryption/crypt/src/crypt-mem-types.h45
-rw-r--r--xlators/encryption/crypt/src/crypt.c4526
-rw-r--r--xlators/encryption/crypt/src/crypt.h904
-rw-r--r--xlators/encryption/crypt/src/data.c769
-rw-r--r--xlators/encryption/crypt/src/keys.c302
-rw-r--r--xlators/encryption/crypt/src/metadata.c619
-rw-r--r--xlators/encryption/crypt/src/metadata.h74
-rw-r--r--xlators/encryption/rot-13/src/rot-13.c201
-rw-r--r--xlators/features/Makefile.am15
-rw-r--r--xlators/features/arbiter/src/Makefile.am8
-rw-r--r--xlators/features/arbiter/src/arbiter-mem-types.h7
-rw-r--r--xlators/features/arbiter/src/arbiter.c508
-rw-r--r--xlators/features/arbiter/src/arbiter.h11
-rw-r--r--xlators/features/barrier/src/Makefile.am5
-rw-r--r--xlators/features/barrier/src/barrier-mem-types.h6
-rw-r--r--xlators/features/barrier/src/barrier.c1081
-rw-r--r--xlators/features/barrier/src/barrier.h129
-rw-r--r--xlators/features/bit-rot/src/bitd/Makefile.am21
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h101
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c78
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h50
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.c2220
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.h34
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-ssm.c124
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-ssm.h38
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.c2959
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.h326
-rw-r--r--xlators/features/bit-rot/src/stub/Makefile.am12
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-common.h175
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-object-version.h12
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c796
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h28
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h117
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.c4217
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.h581
-rw-r--r--xlators/features/changelog/lib/examples/c/get-changes-multi.c80
-rw-r--r--xlators/features/changelog/lib/examples/c/get-changes.c96
-rw-r--r--xlators/features/changelog/lib/examples/c/get-history.c144
-rwxr-xr-x[-rw-r--r--]xlators/features/changelog/lib/examples/python/changes.py16
-rw-r--r--xlators/features/changelog/lib/examples/python/libgfchangelog.py9
-rw-r--r--xlators/features/changelog/lib/src/Makefile.am34
-rw-r--r--xlators/features/changelog/lib/src/changelog-lib-messages.h74
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-api.c318
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.c250
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.h244
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-journal-handler.c1639
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-journal.h94
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-reborp.c626
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-rpc.c93
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-rpc.h10
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog.c869
-rw-r--r--xlators/features/changelog/lib/src/gf-history-changelog.c1467
-rw-r--r--xlators/features/changelog/src/Makefile.am26
-rw-r--r--xlators/features/changelog/src/changelog-barrier.c138
-rw-r--r--xlators/features/changelog/src/changelog-encoders.c279
-rw-r--r--xlators/features/changelog/src/changelog-encoders.h42
-rw-r--r--xlators/features/changelog/src/changelog-ev-handle.c578
-rw-r--r--xlators/features/changelog/src/changelog-ev-handle.h110
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c2606
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h869
-rw-r--r--xlators/features/changelog/src/changelog-mem-types.h33
-rw-r--r--xlators/features/changelog/src/changelog-messages.h172
-rw-r--r--xlators/features/changelog/src/changelog-misc.h163
-rw-r--r--xlators/features/changelog/src/changelog-rpc-common.c499
-rw-r--r--xlators/features/changelog/src/changelog-rpc-common.h59
-rw-r--r--xlators/features/changelog/src/changelog-rpc.c588
-rw-r--r--xlators/features/changelog/src/changelog-rpc.h10
-rw-r--r--xlators/features/changelog/src/changelog-rt.c68
-rw-r--r--xlators/features/changelog/src/changelog-rt.h14
-rw-r--r--xlators/features/changelog/src/changelog.c4410
-rw-r--r--xlators/features/changetimerecorder/src/Makefile.am23
-rw-r--r--xlators/features/changetimerecorder/src/changetimerecorder.c1207
-rw-r--r--xlators/features/changetimerecorder/src/changetimerecorder.h26
-rw-r--r--xlators/features/changetimerecorder/src/ctr-helper.c279
-rw-r--r--xlators/features/changetimerecorder/src/ctr-helper.h478
-rw-r--r--xlators/features/cloudsync/Makefile.am (renamed from xlators/features/ganesha/Makefile.am)0
-rw-r--r--xlators/features/cloudsync/src/Makefile.am46
-rw-r--r--xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.c30
-rw-r--r--xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.h24
-rw-r--r--xlators/features/cloudsync/src/cloudsync-common.c60
-rw-r--r--xlators/features/cloudsync/src/cloudsync-common.h134
-rwxr-xr-xxlators/features/cloudsync/src/cloudsync-fops-c.py324
-rwxr-xr-xxlators/features/cloudsync/src/cloudsync-fops-h.py31
-rw-r--r--xlators/features/cloudsync/src/cloudsync-mem-types.h22
-rw-r--r--xlators/features/cloudsync/src/cloudsync-messages.h16
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/Makefile.am (renamed from xlators/storage/bd/Makefile.am)0
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am11
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/Makefile.am (renamed from xlators/encryption/rot-13/Makefile.am)2
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/Makefile.am12
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3-mem-types.h19
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c584
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.h50
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.sym1
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am (renamed from xlators/cluster/map/Makefile.am)2
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am12
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h203
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h30
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym1
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h19
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c842
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h84
-rw-r--r--xlators/features/cloudsync/src/cloudsync.c2076
-rw-r--r--xlators/features/cloudsync/src/cloudsync.h123
-rw-r--r--xlators/features/compress/src/Makefile.am8
-rw-r--r--xlators/features/compress/src/cdc-helper.c766
-rw-r--r--xlators/features/compress/src/cdc-mem-types.h10
-rw-r--r--xlators/features/compress/src/cdc.c583
-rw-r--r--xlators/features/compress/src/cdc.h76
-rw-r--r--xlators/features/filter/Makefile.am3
-rw-r--r--xlators/features/filter/src/Makefile.am16
-rw-r--r--xlators/features/filter/src/filter.c1734
-rw-r--r--xlators/features/ganesha/src/Makefile.am18
-rw-r--r--xlators/features/ganesha/src/ganesha-mem-types.h21
-rw-r--r--xlators/features/ganesha/src/ganesha.c101
-rw-r--r--xlators/features/gfid-access/src/Makefile.am5
-rw-r--r--xlators/features/gfid-access/src/gfid-access-mem-types.h9
-rw-r--r--xlators/features/gfid-access/src/gfid-access.c2206
-rw-r--r--xlators/features/gfid-access/src/gfid-access.h137
-rw-r--r--xlators/features/glupy/Makefile.am3
-rw-r--r--xlators/features/glupy/doc/README.md44
-rw-r--r--xlators/features/glupy/doc/TESTING9
-rw-r--r--xlators/features/glupy/doc/test.vol10
-rw-r--r--xlators/features/glupy/examples/Makefile.am5
-rw-r--r--xlators/features/glupy/examples/debug-trace.py775
-rw-r--r--xlators/features/glupy/examples/helloworld.py19
-rw-r--r--xlators/features/glupy/examples/negative.py91
-rw-r--r--xlators/features/glupy/src/Makefile.am21
-rw-r--r--xlators/features/glupy/src/__init__.py.in0
-rw-r--r--xlators/features/glupy/src/glupy.c2501
-rw-r--r--xlators/features/glupy/src/glupy.h60
-rw-r--r--xlators/features/glupy/src/glupy.py852
-rw-r--r--xlators/features/glupy/src/setup.py.in24
-rw-r--r--xlators/features/index/src/Makefile.am10
-rw-r--r--xlators/features/index/src/index-mem-types.h15
-rw-r--r--xlators/features/index/src/index-messages.h33
-rw-r--r--xlators/features/index/src/index.c3427
-rw-r--r--xlators/features/index/src/index.h89
-rw-r--r--xlators/features/leases/Makefile.am (renamed from xlators/encryption/crypt/Makefile.am)2
-rw-r--r--xlators/features/leases/src/Makefile.am20
-rw-r--r--xlators/features/leases/src/leases-internal.c1412
-rw-r--r--xlators/features/leases/src/leases-mem-types.h27
-rw-r--r--xlators/features/leases/src/leases-messages.h33
-rw-r--r--xlators/features/leases/src/leases.c1168
-rw-r--r--xlators/features/leases/src/leases.h259
-rw-r--r--xlators/features/locks/src/Makefile.am14
-rw-r--r--xlators/features/locks/src/clear.c689
-rw-r--r--xlators/features/locks/src/clear.h71
-rw-r--r--xlators/features/locks/src/common.c1953
-rw-r--r--xlators/features/locks/src/common.h258
-rw-r--r--xlators/features/locks/src/entrylk.c1498
-rw-r--r--xlators/features/locks/src/inodelk.c1597
-rw-r--r--xlators/features/locks/src/locks-mem-types.h23
-rw-r--r--xlators/features/locks/src/locks.h332
-rw-r--r--xlators/features/locks/src/pl-messages.h29
-rw-r--r--xlators/features/locks/src/posix.c6334
-rw-r--r--xlators/features/locks/src/reservelk.c587
-rw-r--r--xlators/features/locks/tests/unit-test.c96
-rw-r--r--xlators/features/mac-compat/Makefile.am3
-rw-r--r--xlators/features/mac-compat/src/Makefile.am15
-rw-r--r--xlators/features/mac-compat/src/mac-compat.c349
-rw-r--r--xlators/features/mac-compat/src/mac-compat.h41
-rw-r--r--xlators/features/marker/src/Makefile.am13
-rw-r--r--xlators/features/marker/src/marker-common.c78
-rw-r--r--xlators/features/marker/src/marker-common.h12
-rw-r--r--xlators/features/marker/src/marker-mem-types.h23
-rw-r--r--xlators/features/marker/src/marker-quota-helper.c599
-rw-r--r--xlators/features/marker/src/marker-quota-helper.h76
-rw-r--r--xlators/features/marker/src/marker-quota.c5459
-rw-r--r--xlators/features/marker/src/marker-quota.h206
-rw-r--r--xlators/features/marker/src/marker.c4814
-rw-r--r--xlators/features/marker/src/marker.h217
-rw-r--r--xlators/features/metadisp/Makefile.am (renamed from xlators/cluster/stripe/Makefile.am)2
-rw-r--r--xlators/features/metadisp/src/Makefile.am38
-rw-r--r--xlators/features/metadisp/src/backend.c45
-rw-r--r--xlators/features/metadisp/src/fops-tmpl.c10
-rw-r--r--xlators/features/metadisp/src/gen-fops.py160
-rw-r--r--xlators/features/metadisp/src/metadisp-create.c101
-rw-r--r--xlators/features/metadisp/src/metadisp-fops.h51
-rw-r--r--xlators/features/metadisp/src/metadisp-fsync.c54
-rw-r--r--xlators/features/metadisp/src/metadisp-lookup.c90
-rw-r--r--xlators/features/metadisp/src/metadisp-open.c70
-rw-r--r--xlators/features/metadisp/src/metadisp-readdir.c65
-rw-r--r--xlators/features/metadisp/src/metadisp-setattr.c90
-rw-r--r--xlators/features/metadisp/src/metadisp-stat.c124
-rw-r--r--xlators/features/metadisp/src/metadisp-unlink.c160
-rw-r--r--xlators/features/metadisp/src/metadisp.c46
-rw-r--r--xlators/features/metadisp/src/metadisp.h45
-rw-r--r--xlators/features/namespace/Makefile.am3
-rw-r--r--xlators/features/namespace/src/Makefile.am17
-rw-r--r--xlators/features/namespace/src/namespace.c1344
-rw-r--r--xlators/features/namespace/src/namespace.h23
-rw-r--r--xlators/features/path-convertor/Makefile.am3
-rw-r--r--xlators/features/path-convertor/src/Makefile.am15
-rw-r--r--xlators/features/path-convertor/src/path-mem-types.h22
-rw-r--r--xlators/features/path-convertor/src/path.c1228
-rw-r--r--xlators/features/protect/Makefile.am3
-rw-r--r--xlators/features/protect/src/Makefile.am21
-rw-r--r--xlators/features/protect/src/prot_client.c219
-rw-r--r--xlators/features/protect/src/prot_dht.c168
-rw-r--r--xlators/features/protect/src/prot_server.c51
-rw-r--r--xlators/features/qemu-block/src/Makefile.am156
-rw-r--r--xlators/features/qemu-block/src/bdrv-xlator.c391
-rw-r--r--xlators/features/qemu-block/src/bh-syncop.c48
-rw-r--r--xlators/features/qemu-block/src/clock-timer.c60
-rw-r--r--xlators/features/qemu-block/src/coroutine-synctask.c116
-rw-r--r--xlators/features/qemu-block/src/monitor-logging.c50
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.c667
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.h30
-rw-r--r--xlators/features/qemu-block/src/qemu-block-memory-types.h25
-rw-r--r--xlators/features/qemu-block/src/qemu-block.c1139
-rw-r--r--xlators/features/qemu-block/src/qemu-block.h109
-rw-r--r--xlators/features/quiesce/src/Makefile.am7
-rw-r--r--xlators/features/quiesce/src/quiesce-mem-types.h7
-rw-r--r--xlators/features/quiesce/src/quiesce-messages.h28
-rw-r--r--xlators/features/quiesce/src/quiesce.c3370
-rw-r--r--xlators/features/quiesce/src/quiesce.h68
-rw-r--r--xlators/features/quota/src/Makefile.am23
-rw-r--r--xlators/features/quota/src/quota-enforcer-client.c655
-rw-r--r--xlators/features/quota/src/quota-mem-types.h28
-rw-r--r--xlators/features/quota/src/quota-messages.h39
-rw-r--r--xlators/features/quota/src/quota.c8114
-rw-r--r--xlators/features/quota/src/quota.h422
-rw-r--r--xlators/features/quota/src/quotad-aggregator.c740
-rw-r--r--xlators/features/quota/src/quotad-aggregator.h29
-rw-r--r--xlators/features/quota/src/quotad-helpers.c116
-rw-r--r--xlators/features/quota/src/quotad-helpers.h4
-rw-r--r--xlators/features/quota/src/quotad.c313
-rw-r--r--xlators/features/read-only/src/Makefile.am11
-rw-r--r--xlators/features/read-only/src/read-only-common.c552
-rw-r--r--xlators/features/read-only/src/read-only-common.h117
-rw-r--r--xlators/features/read-only/src/read-only-mem-types.h6
-rw-r--r--xlators/features/read-only/src/read-only.c181
-rw-r--r--xlators/features/read-only/src/read-only.h22
-rw-r--r--xlators/features/read-only/src/worm-helper.c395
-rw-r--r--xlators/features/read-only/src/worm-helper.h44
-rw-r--r--xlators/features/read-only/src/worm.c751
-rw-r--r--xlators/features/sdfs/Makefile.am3
-rw-r--r--xlators/features/sdfs/src/Makefile.am19
-rw-r--r--xlators/features/sdfs/src/sdfs-messages.h67
-rw-r--r--xlators/features/sdfs/src/sdfs.c1479
-rw-r--r--xlators/features/sdfs/src/sdfs.h49
-rw-r--r--xlators/features/selinux/Makefile.am3
-rw-r--r--xlators/features/selinux/src/Makefile.am20
-rw-r--r--xlators/features/selinux/src/selinux-mem-types.h (renamed from xlators/features/filter/src/filter-mem-types.h)15
-rw-r--r--xlators/features/selinux/src/selinux-messages.h30
-rw-r--r--xlators/features/selinux/src/selinux.c323
-rw-r--r--xlators/features/selinux/src/selinux.h24
-rw-r--r--xlators/features/shard/src/Makefile.am7
-rw-r--r--xlators/features/shard/src/shard-mem-types.h15
-rw-r--r--xlators/features/shard/src/shard-messages.h39
-rw-r--r--xlators/features/shard/src/shard.c8336
-rw-r--r--xlators/features/shard/src/shard.h468
-rw-r--r--xlators/features/snapview-client/src/Makefile.am7
-rw-r--r--xlators/features/snapview-client/src/snapview-client-mem-types.h12
-rw-r--r--xlators/features/snapview-client/src/snapview-client-messages.h71
-rw-r--r--xlators/features/snapview-client/src/snapview-client.c4074
-rw-r--r--xlators/features/snapview-client/src/snapview-client.h181
-rw-r--r--xlators/features/snapview-server/src/Makefile.am25
-rw-r--r--xlators/features/snapview-server/src/snapview-server-helpers.c905
-rw-r--r--xlators/features/snapview-server/src/snapview-server-mem-types.h15
-rw-r--r--xlators/features/snapview-server/src/snapview-server-messages.h54
-rw-r--r--xlators/features/snapview-server/src/snapview-server-mgmt.c845
-rw-r--r--xlators/features/snapview-server/src/snapview-server.c4095
-rw-r--r--xlators/features/snapview-server/src/snapview-server.h299
-rw-r--r--xlators/features/thin-arbiter/Makefile.am3
-rw-r--r--xlators/features/thin-arbiter/src/Makefile.am22
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter-mem-types.h19
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter-messages.h28
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter.c661
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter.h59
-rw-r--r--xlators/features/trash/src/Makefile.am7
-rw-r--r--xlators/features/trash/src/trash-mem-types.h13
-rw-r--r--xlators/features/trash/src/trash.c4295
-rw-r--r--xlators/features/trash/src/trash.h105
-rw-r--r--xlators/features/upcall/src/Makefile.am12
-rw-r--r--xlators/features/upcall/src/upcall-cache-invalidation.h66
-rw-r--r--xlators/features/upcall/src/upcall-internal.c899
-rw-r--r--xlators/features/upcall/src/upcall-mem-types.h13
-rw-r--r--xlators/features/upcall/src/upcall-messages.h57
-rw-r--r--xlators/features/upcall/src/upcall.c2811
-rw-r--r--xlators/features/upcall/src/upcall.h210
-rw-r--r--xlators/features/utime/Makefile.am3
-rw-r--r--xlators/features/utime/src/Makefile.am41
-rw-r--r--xlators/features/utime/src/utime-autogen-fops-tmpl.c28
-rw-r--r--xlators/features/utime/src/utime-autogen-fops-tmpl.h22
-rwxr-xr-xxlators/features/utime/src/utime-gen-fops-c.py147
-rwxr-xr-xxlators/features/utime/src/utime-gen-fops-h.py35
-rw-r--r--xlators/features/utime/src/utime-helpers.c110
-rw-r--r--xlators/features/utime/src/utime-helpers.h25
-rw-r--r--xlators/features/utime/src/utime-mem-types.h21
-rw-r--r--xlators/features/utime/src/utime-messages.h29
-rw-r--r--xlators/features/utime/src/utime.c392
-rw-r--r--xlators/features/utime/src/utime.h23
-rw-r--r--xlators/lib/src/libxlator.c796
-rw-r--r--xlators/lib/src/libxlator.h121
-rw-r--r--xlators/meta/Makefile.am2
-rw-r--r--xlators/meta/src/Makefile.am5
-rw-r--r--xlators/meta/src/active-link.c30
-rw-r--r--xlators/meta/src/cmdline-file.c37
-rw-r--r--xlators/meta/src/frames-file.c169
-rw-r--r--xlators/meta/src/graph-dir.c134
-rw-r--r--xlators/meta/src/graphs-dir.c92
-rw-r--r--xlators/meta/src/history-file.c38
-rw-r--r--xlators/meta/src/logfile-link.c30
-rw-r--r--xlators/meta/src/logging-dir.c47
-rw-r--r--xlators/meta/src/loglevel-file.c45
-rw-r--r--xlators/meta/src/mallinfo-file.c30
-rw-r--r--xlators/meta/src/measure-file.c42
-rw-r--r--xlators/meta/src/meminfo-file.c38
-rw-r--r--xlators/meta/src/meta-defaults.c780
-rw-r--r--xlators/meta/src/meta-helpers.c450
-rw-r--r--xlators/meta/src/meta-hooks.h6
-rw-r--r--xlators/meta/src/meta-mem-types.h17
-rw-r--r--xlators/meta/src/meta.c296
-rw-r--r--xlators/meta/src/meta.h186
-rw-r--r--xlators/meta/src/name-file.c39
-rw-r--r--xlators/meta/src/option-file.c41
-rw-r--r--xlators/meta/src/options-dir.c67
-rw-r--r--xlators/meta/src/private-file.c38
-rw-r--r--xlators/meta/src/process_uuid-file.c33
-rw-r--r--xlators/meta/src/profile-file.c38
-rw-r--r--xlators/meta/src/root-dir.c110
-rw-r--r--xlators/meta/src/subvolume-link.c72
-rw-r--r--xlators/meta/src/subvolumes-dir.c70
-rw-r--r--xlators/meta/src/top-link.c36
-rw-r--r--xlators/meta/src/type-file.c39
-rw-r--r--xlators/meta/src/version-file.c34
-rw-r--r--xlators/meta/src/view-dir.c32
-rw-r--r--xlators/meta/src/volfile-file.c76
-rw-r--r--xlators/meta/src/xlator-dir.c135
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am64
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-bitd-svc.c241
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-bitd-svc.h22
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-bitrot.c1073
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c4406
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-helper.c4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-helper.h7
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c224
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h43
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-errno.h33
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-ganesha.c1271
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c10573
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.h43
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c235
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.h51
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c478
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.h47
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c9746
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c3964
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.c974
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.h85
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.c1234
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.h42
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-log-ops.c445
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h101
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-messages.h4053
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c1708
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c4591
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.h82
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.c1188
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.h35
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-nfs-svc.c384
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-nfs-svc.h21
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c12659
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h338
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-peer-utils.c1513
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-peer-utils.h65
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c880
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.h63
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c180
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-proc-mgmt.h39
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.c3183
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.h (renamed from xlators/features/ganesha/src/ganesha.h)20
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quotad-svc.c281
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quotad-svc.h19
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rcu.h8
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c1990
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c2631
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-reset-brick.c376
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c3944
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-scrub-svc.c224
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-scrub-svc.h28
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-server-quorum.c704
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-server-quorum.h33
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c153
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h42
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc.c847
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc.h31
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c2428
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.h257
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.c66
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.h21
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc.c714
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc.h26
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c6617
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h155
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c17146
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-statedump.c353
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-statedump.h9
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c8058
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h260
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-helper.c1070
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-helper.h58
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c649
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h104
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c3210
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.h97
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c207
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c20608
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h839
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c9517
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.h340
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c5001
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c4713
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c3491
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h1729
-rw-r--r--xlators/mount/fuse/src/Makefile.am9
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c10308
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h799
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c1011
-rw-r--r--xlators/mount/fuse/src/fuse-mem-types.h25
-rw-r--r--xlators/mount/fuse/src/fuse-resolve.c1021
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in231
-rwxr-xr-xxlators/mount/fuse/utils/mount_glusterfs.in36
-rw-r--r--xlators/nfs/server/src/Makefile.am21
-rw-r--r--xlators/nfs/server/src/acl3.c1553
-rw-r--r--xlators/nfs/server/src/acl3.h18
-rw-r--r--xlators/nfs/server/src/auth-cache.c521
-rw-r--r--xlators/nfs/server/src/auth-cache.h25
-rw-r--r--xlators/nfs/server/src/exports.c1623
-rw-r--r--xlators/nfs/server/src/exports.h69
-rw-r--r--xlators/nfs/server/src/mount3-auth.c681
-rw-r--r--xlators/nfs/server/src/mount3-auth.h28
-rw-r--r--xlators/nfs/server/src/mount3.c6270
-rw-r--r--xlators/nfs/server/src/mount3.h214
-rw-r--r--xlators/nfs/server/src/mount3udp_svc.c336
-rw-r--r--xlators/nfs/server/src/netgroups.c1001
-rw-r--r--xlators/nfs/server/src/netgroups.h31
-rw-r--r--xlators/nfs/server/src/nfs-common.c647
-rw-r--r--xlators/nfs/server/src/nfs-common.h56
-rw-r--r--xlators/nfs/server/src/nfs-fops.c2367
-rw-r--r--xlators/nfs/server/src/nfs-fops.h310
-rw-r--r--xlators/nfs/server/src/nfs-generics.c341
-rw-r--r--xlators/nfs/server/src/nfs-generics.h151
-rw-r--r--xlators/nfs/server/src/nfs-inodes.c771
-rw-r--r--xlators/nfs/server/src/nfs-inodes.h65
-rw-r--r--xlators/nfs/server/src/nfs-mem-types.h70
-rw-r--r--xlators/nfs/server/src/nfs-messages.h1739
-rw-r--r--xlators/nfs/server/src/nfs.c3344
-rw-r--r--xlators/nfs/server/src/nfs.h177
-rw-r--r--xlators/nfs/server/src/nfs3-fh.c223
-rw-r--r--xlators/nfs/server/src/nfs3-fh.h110
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.c4987
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.h280
-rw-r--r--xlators/nfs/server/src/nfs3.c9128
-rw-r--r--xlators/nfs/server/src/nfs3.h387
-rw-r--r--xlators/nfs/server/src/nfsserver.sym12
-rw-r--r--xlators/nfs/server/src/nlm4.c4262
-rw-r--r--xlators/nfs/server/src/nlm4.h131
-rw-r--r--xlators/nfs/server/src/nlmcbk_svc.c168
-rw-r--r--xlators/performance/Makefile.am3
-rw-r--r--xlators/performance/io-cache/src/Makefile.am5
-rw-r--r--xlators/performance/io-cache/src/io-cache-messages.h69
-rw-r--r--xlators/performance/io-cache/src/io-cache.c3080
-rw-r--r--xlators/performance/io-cache/src/io-cache.h406
-rw-r--r--xlators/performance/io-cache/src/ioc-inode.c287
-rw-r--r--xlators/performance/io-cache/src/ioc-mem-types.h24
-rw-r--r--xlators/performance/io-cache/src/page.c1457
-rw-r--r--xlators/performance/io-threads/src/Makefile.am7
-rw-r--r--xlators/performance/io-threads/src/io-threads-messages.h41
-rw-r--r--xlators/performance/io-threads/src/io-threads.c1963
-rw-r--r--xlators/performance/io-threads/src/io-threads.h120
-rw-r--r--xlators/performance/io-threads/src/iot-mem-types.h9
-rw-r--r--xlators/performance/md-cache/src/Makefile.am5
-rw-r--r--xlators/performance/md-cache/src/md-cache-mem-types.h13
-rw-r--r--xlators/performance/md-cache/src/md-cache-messages.h29
-rw-r--r--xlators/performance/md-cache/src/md-cache.c4711
-rw-r--r--xlators/performance/nl-cache/Makefile.am3
-rw-r--r--xlators/performance/nl-cache/src/Makefile.am12
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-helper.c1201
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-mem-types.h27
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-messages.h29
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.c840
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.h175
-rw-r--r--xlators/performance/open-behind/src/Makefile.am7
-rw-r--r--xlators/performance/open-behind/src/open-behind-mem-types.h9
-rw-r--r--xlators/performance/open-behind/src/open-behind-messages.h32
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1615
-rw-r--r--xlators/performance/quick-read/src/Makefile.am7
-rw-r--r--xlators/performance/quick-read/src/quick-read-mem-types.h16
-rw-r--r--xlators/performance/quick-read/src/quick-read-messages.h31
-rw-r--r--xlators/performance/quick-read/src/quick-read.c2133
-rw-r--r--xlators/performance/quick-read/src/quick-read.h88
-rw-r--r--xlators/performance/read-ahead/src/Makefile.am7
-rw-r--r--xlators/performance/read-ahead/src/page.c851
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-mem-types.h17
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-messages.h31
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c1832
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.h183
-rw-r--r--xlators/performance/readdir-ahead/src/Makefile.am9
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h12
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-messages.h30
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c1581
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h94
-rw-r--r--xlators/performance/symlink-cache/Makefile.am3
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am13
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c399
-rw-r--r--xlators/performance/write-behind/src/Makefile.am7
-rw-r--r--xlators/performance/write-behind/src/write-behind-mem-types.h16
-rw-r--r--xlators/performance/write-behind/src/write-behind-messages.h31
-rw-r--r--xlators/performance/write-behind/src/write-behind.c4200
-rw-r--r--xlators/playground/rot-13/Makefile.am (renamed from xlators/cluster/ha/Makefile.am)0
-rw-r--r--xlators/playground/rot-13/src/Makefile.am (renamed from xlators/encryption/rot-13/src/Makefile.am)5
-rw-r--r--xlators/playground/rot-13/src/rot-13.c166
-rw-r--r--xlators/playground/rot-13/src/rot-13.h (renamed from xlators/encryption/rot-13/src/rot-13.h)9
-rw-r--r--xlators/playground/template/src/Makefile.am7
-rw-r--r--xlators/playground/template/src/template.c187
-rw-r--r--xlators/playground/template/src/template.h39
-rw-r--r--xlators/protocol/auth/addr/src/Makefile.am6
-rw-r--r--xlators/protocol/auth/addr/src/addr.c475
-rw-r--r--xlators/protocol/auth/login/src/Makefile.am5
-rw-r--r--xlators/protocol/auth/login/src/login.c323
-rw-r--r--xlators/protocol/client/src/Makefile.am13
-rw-r--r--xlators/protocol/client/src/client-callback.c326
-rw-r--r--xlators/protocol/client/src/client-common.c3589
-rw-r--r--xlators/protocol/client/src/client-common.h630
-rw-r--r--xlators/protocol/client/src/client-handshake.c2610
-rw-r--r--xlators/protocol/client/src/client-helpers.c1031
-rw-r--r--xlators/protocol/client/src/client-lk.c748
-rw-r--r--xlators/protocol/client/src/client-mem-types.h17
-rw-r--r--xlators/protocol/client/src/client-messages.h174
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c10291
-rw-r--r--xlators/protocol/client/src/client-rpc-fops_v2.c6177
-rw-r--r--xlators/protocol/client/src/client.c4350
-rw-r--r--xlators/protocol/client/src/client.h570
-rw-r--r--xlators/protocol/server/src/Makefile.am21
-rw-r--r--xlators/protocol/server/src/authenticate.c355
-rw-r--r--xlators/protocol/server/src/authenticate.h39
-rw-r--r--xlators/protocol/server/src/server-common.c842
-rw-r--r--xlators/protocol/server/src/server-common.h199
-rw-r--r--xlators/protocol/server/src/server-handshake.c1416
-rw-r--r--xlators/protocol/server/src/server-helpers.c2144
-rw-r--r--xlators/protocol/server/src/server-helpers.h93
-rw-r--r--xlators/protocol/server/src/server-mem-types.h23
-rw-r--r--xlators/protocol/server/src/server-messages.h168
-rw-r--r--xlators/protocol/server/src/server-resolve.c921
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c9112
-rw-r--r--xlators/protocol/server/src/server-rpc-fops_v2.c6031
-rw-r--r--xlators/protocol/server/src/server.c2755
-rw-r--r--xlators/protocol/server/src/server.h273
-rw-r--r--xlators/storage/Makefile.am4
-rw-r--r--xlators/storage/bd/src/Makefile.am20
-rw-r--r--xlators/storage/bd/src/bd-aio.c528
-rw-r--r--xlators/storage/bd/src/bd-aio.h41
-rw-r--r--xlators/storage/bd/src/bd-helper.c1023
-rw-r--r--xlators/storage/bd/src/bd-mem-types.h27
-rw-r--r--xlators/storage/bd/src/bd.c2452
-rw-r--r--xlators/storage/bd/src/bd.h173
-rw-r--r--xlators/storage/posix/src/Makefile.am21
-rw-r--r--xlators/storage/posix/src/posix-aio.c957
-rw-r--r--xlators/storage/posix/src/posix-aio.h27
-rw-r--r--xlators/storage/posix/src/posix-common.c1524
-rw-r--r--xlators/storage/posix/src/posix-entry-ops.c2496
-rw-r--r--xlators/storage/posix/src/posix-gfid-path.c243
-rw-r--r--xlators/storage/posix/src/posix-gfid-path.h28
-rw-r--r--xlators/storage/posix/src/posix-handle.c1552
-rw-r--r--xlators/storage/posix/src/posix-handle.h425
-rw-r--r--xlators/storage/posix/src/posix-helpers.c4767
-rw-r--r--xlators/storage/posix/src/posix-inode-fd-ops.c6004
-rw-r--r--xlators/storage/posix/src/posix-inode-handle.h118
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h20
-rw-r--r--xlators/storage/posix/src/posix-messages.h74
-rw-r--r--xlators/storage/posix/src/posix-metadata-disk.h31
-rw-r--r--xlators/storage/posix/src/posix-metadata.c916
-rw-r--r--xlators/storage/posix/src/posix-metadata.h71
-rw-r--r--xlators/storage/posix/src/posix.c6462
-rw-r--r--xlators/storage/posix/src/posix.h741
-rw-r--r--xlators/system/posix-acl/src/Makefile.am8
-rw-r--r--xlators/system/posix-acl/src/posix-acl-mem-types.h13
-rw-r--r--xlators/system/posix-acl/src/posix-acl-messages.h28
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.c229
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.h17
-rw-r--r--xlators/system/posix-acl/src/posix-acl.c3241
-rw-r--r--xlators/system/posix-acl/src/posix-acl.h33
-rw-r--r--xlators/xlator.sym1
724 files changed, 372333 insertions, 291026 deletions
diff --git a/xlators/Makefile.am b/xlators/Makefile.am
index 2a03f905b6d..ef20cbb64fa 100644
--- a/xlators/Makefile.am
+++ b/xlators/Makefile.am
@@ -1,4 +1,13 @@
-SUBDIRS = cluster storage protocol performance debug features encryption mount nfs mgmt system \
- playground meta
+if BUILD_GNFS
+ GNFS_DIR = nfs
+endif
+
+DIST_SUBDIRS = cluster storage protocol performance debug features \
+ mount nfs mgmt system playground meta
+
+SUBDIRS = cluster storage protocol performance debug features \
+ mount ${GNFS_DIR} mgmt system playground meta
+
+EXTRA_DIST = xlator.sym
CLEANFILES =
diff --git a/xlators/cluster/Makefile.am b/xlators/cluster/Makefile.am
index 903fbb39f12..8e067d5ab58 100644
--- a/xlators/cluster/Makefile.am
+++ b/xlators/cluster/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = stripe afr dht ec
+SUBDIRS = afr dht ec
CLEANFILES =
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am
index ac66bf3bb11..610819b28fc 100644
--- a/xlators/cluster/afr/src/Makefile.am
+++ b/xlators/cluster/afr/src/Makefile.am
@@ -1,4 +1,4 @@
-xlator_LTLIBRARIES = afr.la pump.la
+xlator_LTLIBRARIES = afr.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c \
@@ -10,22 +10,19 @@ AFR_SELFHEAL_SOURCES = afr-self-heal-common.c afr-self-heal-data.c \
afr-self-heal-entry.c afr-self-heal-metadata.c afr-self-heald.c \
afr-self-heal-name.c
-afr_la_LDFLAGS = -module -avoid-version
+afr_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
afr_la_SOURCES = $(afr_common_source) $(AFR_SELFHEAL_SOURCES) afr.c
afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-pump_la_LDFLAGS = -module -avoid-version
-pump_la_SOURCES = $(afr_common_source) $(AFR_SELFHEAL_SOURCES) pump.c
-pump_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h \
afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-mem-types.h \
- afr-common.c afr-self-heald.h pump.h \
+ afr-common.c afr-self-heald.h \
$(top_builddir)/xlators/lib/src/libxlator.h afr-messages.h
AM_CPPFLAGS = $(GF_CPPFLAGS) \
-I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \
- -I$(top_srcdir)/rpc/rpc-lib/src
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 5654e3ad03d..032ab5c8001 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -15,29 +15,20 @@
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-#include "statedump.h"
-#include "inode.h"
-
-#include "fd.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/hashfn.h>
+#include <glusterfs/list.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/events.h>
+#include <glusterfs/upcall-utils.h>
#include "afr-inode-read.h"
#include "afr-inode-write.h"
@@ -48,56 +39,799 @@
#include "afr-self-heald.h"
#include "afr-messages.h"
-call_frame_t *
-afr_copy_frame (call_frame_t *base)
+int32_t
+afr_quorum_errno(afr_private_t *priv)
+{
+ return ENOTCONN;
+}
+
+gf_boolean_t
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
+ pid_t pid)
{
- afr_local_t *local = NULL;
- call_frame_t *frame = NULL;
- int op_errno = 0;
+ if (!__is_root_gfid(pargfid)) {
+ return _gf_false;
+ }
+
+ if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) {
+ /*For backward compatibility /.landfill is private*/
+ return _gf_true;
+ }
+
+ if (pid == GF_CLIENT_PID_GSYNCD) {
+ /*geo-rep needs to create/sync private directory on slave because
+ * it appears in changelog*/
+ return _gf_false;
+ }
- frame = copy_frame (base);
- if (!frame)
- return NULL;
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local) {
- AFR_STACK_DESTROY (frame);
- return NULL;
- }
+ if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) {
+ if (strcmp(name, priv->anon_inode_name) == 0) {
+ /* anonymous-inode dir is private*/
+ return _gf_true;
+ }
+ } else {
+ if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) ==
+ 0) {
+ /* anonymous-inode dir prefix is private for geo-rep to work*/
+ return _gf_true;
+ }
+ }
- return frame;
+ return _gf_false;
}
-int
-__afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx)
+void
+afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
+ unsigned char *replies)
{
- uint64_t ctx_int = 0;
- int ret = -1;
- afr_inode_ctx_t *tmp_ctx = NULL;
+ int i = 0;
- ret = __inode_ctx_get (inode, this, &ctx_int);
- if (ret) {
- tmp_ctx = GF_CALLOC (1, sizeof (afr_inode_ctx_t),
- gf_afr_mt_inode_ctx_t);
- if (!tmp_ctx)
- goto out;
-
- ctx_int = (long) tmp_ctx;
- ret = __inode_ctx_set (inode, this, &ctx_int);
- if (ret) {
- GF_FREE (tmp_ctx);
- goto out;
- }
- tmp_ctx->spb_choice = -1;
- tmp_ctx->read_subvol = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && local->replies[i].op_ret == 0) {
+ replies[i] = 1;
} else {
- tmp_ctx = (afr_inode_ctx_t *) ctx_int;
+ replies[i] = 0;
}
+ }
+}
- *ctx = tmp_ctx;
- ret = 0;
+int
+afr_fav_child_reset_sink_xattrs(void *opaque);
+
+int
+afr_fav_child_reset_sink_xattrs_cbk(int ret, call_frame_t *frame, void *opaque);
+
+static void
+afr_discover_done(call_frame_t *frame, xlator_t *this);
+
+int
+afr_dom_lock_acquire_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int i = (long)cookie;
+
+ local->cont.lk.dom_lock_op_ret[i] = op_ret;
+ local->cont.lk.dom_lock_op_errno[i] = op_errno;
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to acquire %s on %s",
+ uuid_utoa(local->fd->inode->gfid), AFR_LK_HEAL_DOM,
+ priv->children[i]->name);
+ } else {
+ local->cont.lk.dom_locked_nodes[i] = 1;
+ }
+
+ syncbarrier_wake(&local->barrier);
+
+ return 0;
+}
+
+int
+afr_dom_lock_acquire(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+ int i = 0;
+
+ priv = frame->this->private;
+ local = frame->local;
+ local->cont.lk.dom_locked_nodes = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.locked_nodes),
+ gf_afr_mt_char);
+ if (!local->cont.lk.dom_locked_nodes) {
+ return -ENOMEM;
+ }
+ local->cont.lk.dom_lock_op_ret = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.dom_lock_op_ret),
+ gf_afr_mt_int32_t);
+ if (!local->cont.lk.dom_lock_op_ret) {
+ return -ENOMEM; /* CALLOC'd members are freed in afr_local_cleanup. */
+ }
+ local->cont.lk.dom_lock_op_errno = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.dom_lock_op_errno),
+ gf_afr_mt_int32_t);
+ if (!local->cont.lk.dom_lock_op_errno) {
+ return -ENOMEM; /* CALLOC'd members are freed in afr_local_cleanup. */
+ }
+ flock.l_type = F_WRLCK;
+
+ AFR_ONALL(frame, afr_dom_lock_acquire_cbk, finodelk, AFR_LK_HEAL_DOM,
+ local->fd, F_SETLK, &flock, NULL);
+
+ if (!afr_has_quorum(local->cont.lk.dom_locked_nodes, frame->this, NULL))
+ goto blocking_lock;
+
+ /*If any of the bricks returned EAGAIN, we still need blocking locks.*/
+ if (AFR_COUNT(local->cont.lk.dom_locked_nodes, priv->child_count) !=
+ priv->child_count) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lk.dom_lock_op_ret[i] == -1 &&
+ local->cont.lk.dom_lock_op_errno[i] == EAGAIN)
+ goto blocking_lock;
+ }
+ }
+
+ return 0;
+
+blocking_lock:
+ afr_dom_lock_release(frame);
+ AFR_ONALL(frame, afr_dom_lock_acquire_cbk, finodelk, AFR_LK_HEAL_DOM,
+ local->fd, F_SETLKW, &flock, NULL);
+ if (!afr_has_quorum(local->cont.lk.dom_locked_nodes, frame->this, NULL)) {
+ afr_dom_lock_release(frame);
+ return -afr_quorum_errno(priv);
+ }
+
+ return 0;
+}
+
+int
+afr_dom_lock_release_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int i = (long)cookie;
+
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to release %s on %s", local->loc.path,
+ AFR_LK_HEAL_DOM, priv->children[i]->name);
+ }
+ local->cont.lk.dom_locked_nodes[i] = 0;
+
+ syncbarrier_wake(&local->barrier);
+
+ return 0;
+}
+
+void
+afr_dom_lock_release(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+
+ local = frame->local;
+ priv = frame->this->private;
+ locked_on = local->cont.lk.dom_locked_nodes;
+ if (AFR_COUNT(locked_on, priv->child_count) == 0)
+ return;
+ flock.l_type = F_UNLCK;
+
+ AFR_ONLIST(locked_on, frame, afr_dom_lock_release_cbk, finodelk,
+ AFR_LK_HEAL_DOM, local->fd, F_SETLK, &flock, NULL);
+
+ return;
+}
+
+static void
+afr_lk_heal_info_cleanup(afr_lk_heal_info_t *info)
+{
+ if (!info)
+ return;
+ if (info->xdata_req)
+ dict_unref(info->xdata_req);
+ if (info->fd)
+ fd_unref(info->fd);
+ GF_FREE(info->locked_nodes);
+ GF_FREE(info->child_up_event_gen);
+ GF_FREE(info->child_down_event_gen);
+ GF_FREE(info);
+}
+
+static int
+afr_add_lock_to_saved_locks(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ afr_local_t *local = frame->local;
+ afr_lk_heal_info_t *info = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -ENOMEM;
+
+ info = GF_CALLOC(sizeof(*info), 1, gf_afr_mt_lk_heal_info_t);
+ if (!info) {
+ goto cleanup;
+ }
+ INIT_LIST_HEAD(&info->pos);
+ info->fd = fd_ref(local->fd);
+ info->cmd = local->cont.lk.cmd;
+ info->pid = frame->root->pid;
+ info->flock = local->cont.lk.user_flock;
+ info->xdata_req = dict_copy_with_ref(local->xdata_req, NULL);
+ if (!info->xdata_req) {
+ goto cleanup;
+ }
+ info->lk_owner = frame->root->lk_owner;
+ info->locked_nodes = GF_MALLOC(
+ sizeof(*info->locked_nodes) * priv->child_count, gf_afr_mt_char);
+ if (!info->locked_nodes) {
+ goto cleanup;
+ }
+ memcpy(info->locked_nodes, local->cont.lk.locked_nodes,
+ sizeof(*info->locked_nodes) * priv->child_count);
+ info->child_up_event_gen = GF_CALLOC(sizeof(*info->child_up_event_gen),
+ priv->child_count, gf_afr_mt_int32_t);
+ if (!info->child_up_event_gen) {
+ goto cleanup;
+ }
+ info->child_down_event_gen = GF_CALLOC(sizeof(*info->child_down_event_gen),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!info->child_down_event_gen) {
+ goto cleanup;
+ }
+
+ LOCK(&local->fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get(local->fd, this);
+ if (fd_ctx)
+ fd_ctx->lk_heal_info = info;
+ }
+ UNLOCK(&local->fd->lock);
+ if (!fd_ctx) {
+ goto cleanup;
+ }
+
+ LOCK(&priv->lock);
+ {
+ list_add_tail(&info->pos, &priv->saved_locks);
+ }
+ UNLOCK(&priv->lock);
+
+ return 0;
+cleanup:
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to add lock to healq",
+ uuid_utoa(local->fd->inode->gfid));
+ if (info) {
+ afr_lk_heal_info_cleanup(info);
+ if (fd_ctx) {
+ LOCK(&local->fd->lock);
+ {
+ fd_ctx->lk_heal_info = NULL;
+ }
+ UNLOCK(&local->fd->lock);
+ }
+ }
+ return ret;
+}
+
+static int
+afr_remove_lock_from_saved_locks(afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ struct gf_flock flock = local->cont.lk.user_flock;
+ afr_lk_heal_info_t *info = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -EINVAL;
+
+ fd_ctx = afr_fd_ctx_get(local->fd, this);
+ if (!fd_ctx || !fd_ctx->lk_heal_info) {
+ goto out;
+ }
+
+ info = fd_ctx->lk_heal_info;
+ if ((info->flock.l_start != flock.l_start) ||
+ (info->flock.l_whence != flock.l_whence) ||
+ (info->flock.l_len != flock.l_len)) {
+ /*TODO: Compare lkowners too.*/
+ goto out;
+ }
+
+ LOCK(&priv->lock);
+ {
+ list_del(&fd_ctx->lk_heal_info->pos);
+ }
+ UNLOCK(&priv->lock);
+
+ afr_lk_heal_info_cleanup(info);
+ fd_ctx->lk_heal_info = NULL;
+ ret = 0;
out:
- return ret;
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to remove lock from healq",
+ uuid_utoa(local->fd->inode->gfid));
+ return ret;
+}
+
+int
+afr_lock_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "Failed to heal lock on child %d for %s", i,
+ uuid_utoa(local->fd->inode->gfid));
+ }
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+int
+afr_getlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "Failed getlk for %s", uuid_utoa(local->fd->inode->gfid));
+ } else {
+ local->cont.lk.getlk_rsp[i] = *lock;
+ }
+
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+static gf_boolean_t
+afr_does_lk_owner_match(call_frame_t *frame, afr_private_t *priv,
+ afr_lk_heal_info_t *info)
+{
+ int i = 0;
+ afr_local_t *local = frame->local;
+ struct gf_flock flock = {
+ 0,
+ };
+ gf_boolean_t ret = _gf_true;
+ char *wind_on = alloca0(priv->child_count);
+ unsigned char *success_replies = alloca0(priv->child_count);
+ local->cont.lk.getlk_rsp = GF_CALLOC(sizeof(*local->cont.lk.getlk_rsp),
+ priv->child_count, gf_afr_mt_gf_lock);
+
+ flock = info->flock;
+ for (i = 0; i < priv->child_count; i++) {
+ if (info->locked_nodes[i])
+ wind_on[i] = 1;
+ }
+
+ AFR_ONLIST(wind_on, frame, afr_getlk_cbk, lk, info->fd, F_GETLK, &flock,
+ info->xdata_req);
+
+ afr_fill_success_replies(local, priv, success_replies);
+ if (AFR_COUNT(success_replies, priv->child_count) == 0) {
+ ret = _gf_false;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid || local->replies[i].op_ret != 0)
+ continue;
+ if (local->cont.lk.getlk_rsp[i].l_type == F_UNLCK)
+ continue;
+ /*TODO: Do we really need to compare lkowner if F_UNLCK is true?*/
+ if (!is_same_lkowner(&local->cont.lk.getlk_rsp[i].l_owner,
+ &info->lk_owner)) {
+ ret = _gf_false;
+ break;
+ }
+ }
+out:
+ afr_local_replies_wipe(local, priv);
+ GF_FREE(local->cont.lk.getlk_rsp);
+ local->cont.lk.getlk_rsp = NULL;
+ return ret;
+}
+
+static void
+afr_mark_fd_bad(fd_t *fd, xlator_t *this)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ if (!fd)
+ return;
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get(fd, this);
+ if (fd_ctx) {
+ fd_ctx->is_fd_bad = _gf_true;
+ fd_ctx->lk_heal_info = NULL;
+ }
+ }
+ UNLOCK(&fd->lock);
+}
+
+static void
+afr_add_lock_to_lkhealq(afr_private_t *priv, afr_lk_heal_info_t *info)
+{
+ LOCK(&priv->lock);
+ {
+ list_del(&info->pos);
+ list_add_tail(&info->pos, &priv->lk_healq);
+ }
+ UNLOCK(&priv->lock);
}
+
+static void
+afr_lock_heal_do(call_frame_t *frame, afr_private_t *priv,
+ afr_lk_heal_info_t *info)
+{
+ int i = 0;
+ int op_errno = 0;
+ int32_t *current_event_gen = NULL;
+ afr_local_t *local = frame->local;
+ xlator_t *this = frame->this;
+ char *wind_on = alloca0(priv->child_count);
+ gf_boolean_t retry = _gf_true;
+
+ frame->root->pid = info->pid;
+ lk_owner_copy(&frame->root->lk_owner, &info->lk_owner);
+
+ op_errno = -afr_dom_lock_acquire(frame);
+ if ((op_errno != 0)) {
+ goto release;
+ }
+
+ if (!afr_does_lk_owner_match(frame, priv, info)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_LK_HEAL_DOM,
+ "Ignoring lock heal for %s since lk-onwers mismatch. "
+ "Lock possibly pre-empted by another client.",
+ uuid_utoa(info->fd->inode->gfid));
+ goto release;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (info->locked_nodes[i])
+ continue;
+ wind_on[i] = 1;
+ }
+
+ current_event_gen = alloca(priv->child_count);
+ memcpy(current_event_gen, info->child_up_event_gen,
+ priv->child_count * sizeof *current_event_gen);
+ AFR_ONLIST(wind_on, frame, afr_lock_heal_cbk, lk, info->fd, info->cmd,
+ &info->flock, info->xdata_req);
+
+ LOCK(&priv->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!wind_on[i])
+ continue;
+ if ((!local->replies[i].valid) || (local->replies[i].op_ret != 0)) {
+ continue;
+ }
+
+ if ((current_event_gen[i] == info->child_up_event_gen[i]) &&
+ (current_event_gen[i] > info->child_down_event_gen[i])) {
+ info->locked_nodes[i] = 1;
+ retry = _gf_false;
+ list_del_init(&info->pos);
+ list_add_tail(&info->pos, &priv->saved_locks);
+ } else {
+ /*We received subsequent child up/down events while heal was in
+ * progress; don't mark child as healed. Attempt again on the
+ * new child up*/
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_LK_HEAL_DOM,
+ "Event gen mismatch: skipped healing lock on child %d "
+ "for %s.",
+ i, uuid_utoa(info->fd->inode->gfid));
+ }
+ }
+ }
+ UNLOCK(&priv->lock);
+
+release:
+ afr_dom_lock_release(frame);
+ if (retry)
+ afr_add_lock_to_lkhealq(priv, info);
+ return;
+}
+
+static int
+afr_lock_heal_done(int ret, call_frame_t *frame, void *opaque)
+{
+ STACK_DESTROY(frame->root);
+ return 0;
+}
+
+static int
+afr_lock_heal(void *opaque)
+{
+ call_frame_t *frame = (call_frame_t *)opaque;
+ call_frame_t *iter_frame = NULL;
+ xlator_t *this = frame->this;
+ afr_private_t *priv = this->private;
+ afr_lk_heal_info_t *info = NULL;
+ afr_lk_heal_info_t *tmp = NULL;
+ struct list_head healq = {
+ 0,
+ };
+ int ret = 0;
+
+ iter_frame = afr_copy_frame(frame);
+ if (!iter_frame) {
+ return ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&healq);
+ LOCK(&priv->lock);
+ {
+ list_splice_init(&priv->lk_healq, &healq);
+ }
+ UNLOCK(&priv->lock);
+
+ list_for_each_entry_safe(info, tmp, &healq, pos)
+ {
+ GF_ASSERT((AFR_COUNT(info->locked_nodes, priv->child_count) <
+ priv->child_count));
+ ((afr_local_t *)(iter_frame->local))->fd = fd_ref(info->fd);
+ afr_lock_heal_do(iter_frame, priv, info);
+ AFR_STACK_RESET(iter_frame);
+ if (iter_frame->local == NULL) {
+ ret = ENOTCONN;
+ gf_msg(frame->this->name, GF_LOG_ERROR, ENOTCONN,
+ AFR_MSG_LK_HEAL_DOM,
+ "Aborting processing of lk_healq."
+ "Healing will be reattempted on next child up for locks "
+ "that are still in quorum.");
+ LOCK(&priv->lock);
+ {
+ list_add_tail(&healq, &priv->lk_healq);
+ }
+ UNLOCK(&priv->lock);
+ break;
+ }
+ }
+
+ AFR_STACK_DESTROY(iter_frame);
+ return ret;
+}
+
+static int
+__afr_lock_heal_synctask(xlator_t *this, afr_private_t *priv, int child)
+{
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ afr_lk_heal_info_t *info = NULL;
+ afr_lk_heal_info_t *tmp = NULL;
+
+ if (priv->shd.iamshd)
+ return 0;
+
+ list_for_each_entry_safe(info, tmp, &priv->saved_locks, pos)
+ {
+ info->child_up_event_gen[child] = priv->event_generation;
+ list_del_init(&info->pos);
+ list_add_tail(&info->pos, &priv->lk_healq);
+ }
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ return -1;
+
+ ret = synctask_new(this->ctx->env, afr_lock_heal, afr_lock_heal_done, frame,
+ frame);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_LK_HEAL_DOM,
+ "Failed to launch lock heal synctask");
+
+ return ret;
+}
+
+static int
+__afr_mark_pending_lk_heal(xlator_t *this, afr_private_t *priv, int child)
+{
+ afr_lk_heal_info_t *info = NULL;
+ afr_lk_heal_info_t *tmp = NULL;
+
+ if (priv->shd.iamshd)
+ return 0;
+ list_for_each_entry_safe(info, tmp, &priv->saved_locks, pos)
+ {
+ info->child_down_event_gen[child] = priv->event_generation;
+ if (info->locked_nodes[child] == 1)
+ info->locked_nodes[child] = 0;
+ if (!afr_has_quorum(info->locked_nodes, this, NULL)) {
+ /* Since the lock was lost on quorum no. of nodes, we should
+ * not attempt to heal it anymore. Some other client could have
+ * acquired the lock, modified data and released it and this
+ * client wouldn't know about it if we heal it.*/
+ afr_mark_fd_bad(info->fd, this);
+ list_del(&info->pos);
+ afr_lk_heal_info_cleanup(info);
+ /* We're not winding an unlock on the node where the lock is still
+ * present because when fencing logic switches over to the new
+ * client (since we marked the fd bad), it should preempt any
+ * existing lock. */
+ }
+ }
+ return 0;
+}
+
+gf_boolean_t
+afr_is_consistent_io_possible(afr_local_t *local, afr_private_t *priv,
+ int32_t *op_errno)
+{
+ if (priv->consistent_io && local->call_count != priv->child_count) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOLS_DOWN,
+ "All subvolumes are not up");
+ if (op_errno)
+ *op_errno = ENOTCONN;
+ return _gf_false;
+ }
+ return _gf_true;
+}
+
+gf_boolean_t
+afr_is_lock_mode_mandatory(dict_t *xdata)
+{
+ int ret = 0;
+ uint32_t lk_mode = GF_LK_ADVISORY;
+
+ ret = dict_get_uint32(xdata, GF_LOCK_MODE, &lk_mode);
+ if (!ret && lk_mode == GF_LK_MANDATORY)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+call_frame_t *
+afr_copy_frame(call_frame_t *base)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ int op_errno = 0;
+
+ frame = copy_frame(base);
+ if (!frame)
+ return NULL;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local) {
+ AFR_STACK_DESTROY(frame);
+ return NULL;
+ }
+
+ return frame;
+}
+
+/* Check if an entry or inode could be undergoing a transaction. */
+gf_boolean_t
+afr_is_possibly_under_txn(afr_transaction_type type, afr_local_t *local,
+ xlator_t *this)
+{
+ int i = 0;
+ int tmp = 0;
+ afr_private_t *priv = NULL;
+ GF_UNUSED char *key = NULL;
+ int keylen = 0;
+
+ priv = this->private;
+
+ if (type == AFR_ENTRY_TRANSACTION) {
+ key = GLUSTERFS_PARENT_ENTRYLK;
+ keylen = SLEN(GLUSTERFS_PARENT_ENTRYLK);
+ } else if (type == AFR_DATA_TRANSACTION) {
+ /*FIXME: Use GLUSTERFS_INODELK_DOM_COUNT etc. once
+ * pl_inodelk_xattr_fill supports separate keys for different
+ * domains.*/
+ key = GLUSTERFS_INODELK_COUNT;
+ keylen = SLEN(GLUSTERFS_INODELK_COUNT);
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].xdata)
+ continue;
+ if (dict_get_int32n(local->replies[i].xdata, key, keylen, &tmp) == 0)
+ if (tmp)
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+static void
+afr_inode_ctx_destroy(afr_inode_ctx_t *ctx)
+{
+ int i = 0;
+
+ if (!ctx)
+ return;
+
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
+ GF_FREE(ctx->pre_op_done[i]);
+ }
+
+ GF_FREE(ctx);
+}
+
+int
+__afr_inode_ctx_get(xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx)
+{
+ uint64_t ctx_int = 0;
+ int ret = -1;
+ int i = -1;
+ int num_locks = -1;
+ afr_inode_ctx_t *ictx = NULL;
+ afr_lock_t *lock = NULL;
+ afr_private_t *priv = this->private;
+
+ ret = __inode_ctx_get(inode, this, &ctx_int);
+ if (ret == 0) {
+ *ctx = (afr_inode_ctx_t *)(uintptr_t)ctx_int;
+ return 0;
+ }
+
+ ictx = GF_CALLOC(1, sizeof(afr_inode_ctx_t), gf_afr_mt_inode_ctx_t);
+ if (!ictx)
+ goto out;
+
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
+ ictx->pre_op_done[i] = GF_CALLOC(sizeof *ictx->pre_op_done[i],
+ priv->child_count, gf_afr_mt_int32_t);
+ if (!ictx->pre_op_done[i]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ num_locks = sizeof(ictx->lock) / sizeof(afr_lock_t);
+ for (i = 0; i < num_locks; i++) {
+ lock = &ictx->lock[i];
+ INIT_LIST_HEAD(&lock->post_op);
+ INIT_LIST_HEAD(&lock->frozen);
+ INIT_LIST_HEAD(&lock->waiting);
+ INIT_LIST_HEAD(&lock->owners);
+ }
+
+ ctx_int = (uint64_t)(uintptr_t)ictx;
+ ret = __inode_ctx_set(inode, this, &ctx_int);
+ if (ret) {
+ goto out;
+ }
+
+ ictx->spb_choice = -1;
+ ictx->read_subvol = 0;
+ ictx->write_subvol = 0;
+ ictx->lock_count = 0;
+ ret = 0;
+ *ctx = ictx;
+out:
+ if (ret) {
+ afr_inode_ctx_destroy(ictx);
+ }
+ return ret;
+}
+
/*
* INODE CTX 64-bit VALUE FORMAT FOR SMALL (<= 16) SUBVOL COUNTS:
*
@@ -129,1349 +863,2258 @@ out:
*/
int
-__afr_inode_read_subvol_get_small (inode_t *inode, xlator_t *this,
- unsigned char *data, unsigned char *metadata,
- int *event_p)
+__afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
+ inode_t *inode)
{
- afr_private_t *priv = NULL;
- int ret = -1;
- uint16_t datamap = 0;
- uint16_t metadatamap = 0;
- uint32_t event = 0;
- uint64_t val = 0;
- int i = 0;
- afr_inode_ctx_t *ctx = NULL;
-
- priv = this->private;
+ int i = 0;
+ int txn_type = 0;
+ int count = 0;
+ int index = -1;
+ uint16_t datamap_old = 0;
+ uint16_t metadatamap_old = 0;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint16_t tmp_map = 0;
+ uint16_t mask = 0;
+ uint32_t event = 0;
+ uint64_t val = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ txn_type = local->transaction.type;
+
+ if (txn_type == AFR_DATA_TRANSACTION)
+ val = local->inode_ctx->write_subvol;
+ else
+ val = local->inode_ctx->read_subvol;
+
+ metadatamap_old = metadatamap = (val & 0x000000000000ffff);
+ datamap_old = datamap = (val & 0x00000000ffff0000) >> 16;
+ event = (val & 0xffffffff00000000) >> 32;
+
+ if (txn_type == AFR_DATA_TRANSACTION)
+ tmp_map = datamap;
+ else if (txn_type == AFR_METADATA_TRANSACTION)
+ tmp_map = metadatamap;
+
+ count = gf_bits_count(tmp_map);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.failed_subvols[i])
+ continue;
+
+ mask = 1 << i;
+ if (txn_type == AFR_METADATA_TRANSACTION)
+ metadatamap &= ~mask;
+ else if (txn_type == AFR_DATA_TRANSACTION)
+ datamap &= ~mask;
+ }
+
+ switch (txn_type) {
+ case AFR_METADATA_TRANSACTION:
+ if ((metadatamap_old != 0) && (metadatamap == 0) && (count == 1)) {
+ index = gf_bits_index(tmp_map);
+ local->transaction.in_flight_sb_errno = local->replies[index]
+ .op_errno;
+ local->transaction.in_flight_sb = _gf_true;
+ metadatamap |= (1 << index);
+ }
+ if (metadatamap_old != metadatamap) {
+ __afr_inode_need_refresh_set(inode, this);
+ }
+ break;
+
+ case AFR_DATA_TRANSACTION:
+ if ((datamap_old != 0) && (datamap == 0) && (count == 1)) {
+ index = gf_bits_index(tmp_map);
+ local->transaction.in_flight_sb_errno = local->replies[index]
+ .op_errno;
+ local->transaction.in_flight_sb = _gf_true;
+ datamap |= (1 << index);
+ }
+ if (datamap_old != datamap)
+ __afr_inode_need_refresh_set(inode, this);
+ break;
- ret = __afr_inode_ctx_get (this, inode, &ctx);
- if (ret < 0)
- return ret;
-
- val = ctx->read_subvol;
+ default:
+ break;
+ }
- metadatamap = (val & 0x000000000000ffff);
- datamap = (val & 0x00000000ffff0000) >> 16;
- event = (val & 0xffffffff00000000) >> 32;
+ val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
+ (((uint64_t)event) << 32);
- for (i = 0; i < priv->child_count; i++) {
- if (metadata)
- metadata[i] = (metadatamap >> i) & 1;
- if (data)
- data[i] = (datamap >> i) & 1;
- }
+ if (txn_type == AFR_DATA_TRANSACTION)
+ local->inode_ctx->write_subvol = val;
+ local->inode_ctx->read_subvol = val;
- if (event_p)
- *event_p = event;
- return ret;
+ return 0;
}
+gf_boolean_t
+afr_is_symmetric_error(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int op_errno = 0;
+ int i_errno = 0;
+ gf_boolean_t matching_errors = _gf_true;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret != -1) {
+ /* Operation succeeded on at least one subvol,
+ so it is not a failed-everywhere situation.
+ */
+ matching_errors = _gf_false;
+ break;
+ }
+ i_errno = local->replies[i].op_errno;
+
+ if (i_errno == ENOTCONN) {
+ /* ENOTCONN is not a symmetric error. We do not
+ know if the operation was performed on the
+ backend or not.
+ */
+ matching_errors = _gf_false;
+ break;
+ }
+
+ if (!op_errno) {
+ op_errno = i_errno;
+ } else if (op_errno != i_errno) {
+ /* Mismatching op_errno's */
+ matching_errors = _gf_false;
+ break;
+ }
+ }
+
+ return matching_errors;
+}
int
-__afr_inode_read_subvol_set_small (inode_t *inode, xlator_t *this,
- unsigned char *data, unsigned char *metadata,
- int event)
+afr_set_in_flight_sb_status(xlator_t *this, call_frame_t *frame, inode_t *inode)
{
- afr_private_t *priv = NULL;
- uint16_t datamap = 0;
- uint16_t metadatamap = 0;
- uint64_t val = 0;
- int i = 0;
- int ret = -1;
- afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- priv = this->private;
+ priv = this->private;
+ local = frame->local;
- ret = __afr_inode_ctx_get (this, inode, &ctx);
- if (ret)
- goto out;
+ /* If this transaction saw no failures, then exit. */
+ if (AFR_COUNT(local->transaction.failed_subvols, priv->child_count) == 0)
+ return 0;
- for (i = 0; i < priv->child_count; i++) {
- if (data[i])
- datamap |= (1 << i);
- if (metadata[i])
- metadatamap |= (1 << i);
- }
+ if (afr_is_symmetric_error(frame, this))
+ return 0;
- val = ((uint64_t) metadatamap) |
- (((uint64_t) datamap) << 16) |
- (((uint64_t) event) << 32);
+ LOCK(&inode->lock);
+ {
+ ret = __afr_set_in_flight_sb_status(this, local, inode);
+ }
+ UNLOCK(&inode->lock);
- ctx->read_subvol = val;
+ return ret;
+}
- ret = 0;
-out:
+int
+__afr_inode_read_subvol_get_small(inode_t *inode, xlator_t *this,
+ unsigned char *data, unsigned char *metadata,
+ int *event_p)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint32_t event = 0;
+ uint64_t val = 0;
+ int i = 0;
+ afr_inode_ctx_t *ctx = NULL;
+
+ priv = this->private;
+
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret < 0)
return ret;
+
+ val = ctx->read_subvol;
+
+ metadatamap = (val & 0x000000000000ffff);
+ datamap = (val & 0x00000000ffff0000) >> 16;
+ event = (val & 0xffffffff00000000) >> 32;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (metadata)
+ metadata[i] = (metadatamap >> i) & 1;
+ if (data)
+ data[i] = (datamap >> i) & 1;
+ }
+
+ if (event_p)
+ *event_p = event;
+ return ret;
}
int
-__afr_inode_read_subvol_reset_small (inode_t *inode, xlator_t *this)
+__afr_inode_read_subvol_set_small(inode_t *inode, xlator_t *this,
+ unsigned char *data, unsigned char *metadata,
+ int event)
{
- int ret = -1;
- uint16_t datamap = 0;
- uint16_t metadatamap = 0;
- uint32_t event = 0;
- uint64_t val = 0;
- afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint64_t val = 0;
+ int i = 0;
+ int ret = -1;
+ afr_inode_ctx_t *ctx = NULL;
- ret = __afr_inode_ctx_get (this, inode, &ctx);
- if (ret)
- return ret;
+ priv = this->private;
- val = ctx->read_subvol;
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret)
+ goto out;
- metadatamap = (val & 0x000000000000ffff) >> 0;
- datamap = (val & 0x00000000ffff0000) >> 16;
- event = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (data[i])
+ datamap |= (1 << i);
+ if (metadata[i])
+ metadatamap |= (1 << i);
+ }
- val = ((uint64_t) metadatamap) |
- (((uint64_t) datamap) << 16) |
- (((uint64_t) event) << 32);
+ val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
+ (((uint64_t)event) << 32);
- ctx->read_subvol = val;
+ ctx->read_subvol = val;
- return ret;
+ ret = 0;
+out:
+ return ret;
}
-
int
-__afr_inode_read_subvol_get (inode_t *inode, xlator_t *this,
- unsigned char *data, unsigned char *metadata,
- int *event_p)
+__afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int *event_p)
{
- afr_private_t *priv = NULL;
- int ret = -1;
+ afr_private_t *priv = NULL;
+ int ret = -1;
- priv = this->private;
+ priv = this->private;
- if (priv->child_count <= 16)
- ret = __afr_inode_read_subvol_get_small (inode, this, data,
- metadata, event_p);
- else
- /* TBD: allocate structure with array and read from it */
- ret = -1;
+ if (priv->child_count <= 16)
+ ret = __afr_inode_read_subvol_get_small(inode, this, data, metadata,
+ event_p);
+ else
+ /* TBD: allocate structure with array and read from it */
+ ret = -1;
- return ret;
+ return ret;
}
int
-__afr_inode_split_brain_choice_get (inode_t *inode, xlator_t *this,
- int *spb_choice)
+__afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this,
+ int *spb_choice)
{
- afr_inode_ctx_t *ctx = NULL;
- int ret = -1;
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
- ret = __afr_inode_ctx_get (this, inode, &ctx);
- if (ret < 0)
- return ret;
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret < 0)
+ return ret;
- *spb_choice = ctx->spb_choice;
- return 0;
+ *spb_choice = ctx->spb_choice;
+ return 0;
}
int
-__afr_inode_read_subvol_set (inode_t *inode, xlator_t *this, unsigned char *data,
- unsigned char *metadata, int event)
+__afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int event)
{
- afr_private_t *priv = NULL;
- int ret = -1;
+ afr_private_t *priv = NULL;
+ int ret = -1;
- priv = this->private;
+ priv = this->private;
- if (priv->child_count <= 16)
- ret = __afr_inode_read_subvol_set_small (inode, this, data,
- metadata, event);
- else
- ret = -1;
+ if (priv->child_count <= 16)
+ ret = __afr_inode_read_subvol_set_small(inode, this, data, metadata,
+ event);
+ else
+ ret = -1;
- return ret;
+ return ret;
}
int
-__afr_inode_split_brain_choice_set (inode_t *inode, xlator_t *this,
- int spb_choice)
+__afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this,
+ int spb_choice)
{
- afr_inode_ctx_t *ctx = NULL;
- int ret = -1;
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
- ret = __afr_inode_ctx_get (this, inode, &ctx);
- if (ret)
- goto out;
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret)
+ goto out;
- ctx->spb_choice = spb_choice;
+ ctx->spb_choice = spb_choice;
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
-__afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)
+afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int *event_p)
{
- afr_private_t *priv = NULL;
- int ret = -1;
+ int ret = -1;
- priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- if (priv->child_count <= 16)
- ret = __afr_inode_read_subvol_reset_small (inode, this);
- else
- ret = -1;
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_read_subvol_get(inode, this, data, metadata, event_p);
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
- return ret;
+int
+afr_inode_get_readable(call_frame_t *frame, inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p, int type)
+{
+ afr_private_t *priv = this->private;
+ afr_local_t *local = frame->local;
+ unsigned char *data = alloca0(priv->child_count);
+ unsigned char *metadata = alloca0(priv->child_count);
+ int data_count = 0;
+ int metadata_count = 0;
+ int event_generation = 0;
+ int ret = 0;
+
+ ret = afr_inode_read_subvol_get(inode, this, data, metadata,
+ &event_generation);
+ if (ret == -1)
+ return -EIO;
+
+ data_count = AFR_COUNT(data, priv->child_count);
+ metadata_count = AFR_COUNT(metadata, priv->child_count);
+
+ if (inode->ia_type == IA_IFDIR) {
+ /* For directories, allow even if it is in data split-brain. */
+ if (type == AFR_METADATA_TRANSACTION || local->op == GF_FOP_STAT ||
+ local->op == GF_FOP_FSTAT) {
+ if (!metadata_count)
+ return -EIO;
+ }
+ } else {
+ /* For files, abort in case of data/metadata split-brain. */
+ if (!data_count || !metadata_count) {
+ return -EIO;
+ }
+ }
+
+ if (type == AFR_METADATA_TRANSACTION && readable)
+ memcpy(readable, metadata, priv->child_count * sizeof *metadata);
+ if (type == AFR_DATA_TRANSACTION && readable) {
+ if (!data_count)
+ memcpy(readable, local->child_up,
+ priv->child_count * sizeof *readable);
+ else
+ memcpy(readable, data, priv->child_count * sizeof *data);
+ }
+ if (event_p)
+ *event_p = event_generation;
+ return 0;
}
+static int
+afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this,
+ int *spb_choice)
+{
+ int ret = -1;
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_split_brain_choice_get(inode, this, spb_choice);
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
+/*
+ * frame is used to get the favourite policy. Since
+ * afr_inode_split_brain_choice_get was called with afr_open, it is possible to
+ * have a frame with out local->replies. So in that case, frame is passed as
+ * null, hence this function will handle the frame NULL case.
+ */
int
-afr_inode_read_subvol_get (inode_t *inode, xlator_t *this, unsigned char *data,
- unsigned char *metadata, int *event_p)
+afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this,
+ call_frame_t *frame, int *spb_subvol)
{
- int ret = -1;
+ int ret = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("afr", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, spb_subvol, out);
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ priv = this->private;
+
+ ret = afr_inode_split_brain_choice_get(inode, this, spb_subvol);
+ if (*spb_subvol < 0 && priv->fav_child_policy && frame && frame->local) {
+ local = frame->local;
+ *spb_subvol = afr_sh_get_fav_by_policy(this, local->replies, inode,
+ NULL);
+ if (*spb_subvol >= 0) {
+ ret = 0;
+ }
+ }
- LOCK(&inode->lock);
- {
- ret = __afr_inode_read_subvol_get (inode, this, data,
- metadata, event_p);
- }
- UNLOCK(&inode->lock);
out:
- return ret;
+ return ret;
}
-
int
-afr_inode_split_brain_choice_get (inode_t *inode, xlator_t *this,
- int *spb_choice)
+afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int event)
{
- int ret = -1;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- LOCK(&inode->lock);
- {
- ret = __afr_inode_split_brain_choice_get (inode, this,
- spb_choice);
- }
- UNLOCK(&inode->lock);
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_read_subvol_set(inode, this, data, metadata, event);
+ }
+ UNLOCK(&inode->lock);
out:
- return ret;
+ return ret;
}
-
int
-afr_inode_read_subvol_set (inode_t *inode, xlator_t *this, unsigned char *data,
- unsigned char *metadata, int event)
+afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this, int spb_choice)
{
- int ret = -1;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- LOCK(&inode->lock);
- {
- ret = __afr_inode_read_subvol_set (inode, this, data, metadata,
- event);
- }
- UNLOCK(&inode->lock);
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_split_brain_choice_set(inode, this, spb_choice);
+ }
+ UNLOCK(&inode->lock);
out:
- return ret;
+ return ret;
}
-
-int
-afr_inode_split_brain_choice_set (inode_t *inode, xlator_t *this,
- int spb_choice)
+/* The caller of this should perform afr_inode_refresh, if this function
+ * returns _gf_true
+ */
+gf_boolean_t
+afr_is_inode_refresh_reqd(inode_t *inode, xlator_t *this, int event_gen1,
+ int event_gen2)
{
- int ret = -1;
+ gf_boolean_t need_refresh = _gf_false;
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret)
+ goto unlock;
- LOCK(&inode->lock);
- {
- ret = __afr_inode_split_brain_choice_set (inode, this,
- spb_choice);
- }
- UNLOCK(&inode->lock);
+ need_refresh = ctx->need_refresh;
+ /* Hoping that the caller will do inode_refresh followed by
+ * this, hence setting the need_refresh to false */
+ ctx->need_refresh = _gf_false;
+ }
+unlock:
+ UNLOCK(&inode->lock);
+
+ if (event_gen1 != event_gen2)
+ need_refresh = _gf_true;
out:
- return ret;
+ return need_refresh;
}
+int
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+ afr_inode_ctx_t *ctx = NULL;
+
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret == 0) {
+ ctx->need_refresh = _gf_true;
+ }
+
+ return ret;
+}
int
-afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
{
- int ret = -1;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- LOCK(&inode->lock);
- {
- ret = __afr_inode_read_subvol_reset (inode, this);
- }
- UNLOCK(&inode->lock);
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_need_refresh_set(inode, this);
+ }
+ UNLOCK(&inode->lock);
out:
- return ret;
+ return ret;
}
-
int
-afr_accused_fill (xlator_t *this, dict_t *xdata, unsigned char *accused,
- afr_transaction_type type)
+afr_spb_choice_timeout_cancel(xlator_t *this, inode_t *inode)
{
- afr_private_t *priv = NULL;
- int i = 0;
- int idx = afr_index_for_transaction_type (type);
- void *pending_raw = NULL;
- int pending[3];
- int ret = 0;
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
- priv = this->private;
+ if (!inode)
+ return ret;
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xdata, priv->pending_key[i],
- &pending_raw);
- if (ret) /* no pending flags */
- continue;
- memcpy (pending, pending_raw, sizeof(pending));
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret < 0 || !ctx) {
+ UNLOCK(&inode->lock);
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
+ "Failed to cancel split-brain choice timer.");
+ goto out;
+ }
+ ctx->spb_choice = -1;
+ if (ctx->timer) {
+ gf_timer_call_cancel(this->ctx, ctx->timer);
+ ctx->timer = NULL;
+ }
+ ret = 0;
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
- if (ntoh32 (pending[idx]))
- accused[i] = 1;
- }
+void
+afr_set_split_brain_choice_cbk(void *data)
+{
+ inode_t *inode = data;
+ xlator_t *this = THIS;
- return 0;
+ afr_spb_choice_timeout_cancel(this, inode);
+ inode_invalidate(inode);
+ inode_unref(inode);
+ return;
}
-
int
-afr_accuse_smallfiles (xlator_t *this, struct afr_reply *replies,
- unsigned char *data_accused)
+afr_set_split_brain_choice(int ret, call_frame_t *frame, void *opaque)
{
- int i = 0;
- afr_private_t *priv = NULL;
- uint64_t maxsize = 0;
+ int op_errno = ENOMEM;
+ afr_private_t *priv = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+ inode_t *inode = NULL;
+ loc_t *loc = NULL;
+ xlator_t *this = NULL;
+ afr_spbc_timeout_t *data = opaque;
+ struct timespec delta = {
+ 0,
+ };
+ gf_boolean_t timer_set = _gf_false;
+ gf_boolean_t timer_cancelled = _gf_false;
+ gf_boolean_t timer_reset = _gf_false;
+ int old_spb_choice = -1;
+
+ frame = data->frame;
+ loc = data->loc;
+ this = frame->this;
+ priv = this->private;
+
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ delta.tv_sec = priv->spb_choice_timeout;
+ delta.tv_nsec = 0;
+
+ if (!loc->inode) {
+ ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (!(data->d_spb || data->m_spb)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
+ "Cannot set "
+ "replica.split-brain-choice on %s. File is"
+ " not in data/metadata split-brain.",
+ uuid_utoa(loc->gfid));
+ ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ /*
+ * we're ref'ing the inode before LOCK like it is done elsewhere in the
+ * code. If we ref after LOCK, coverity complains of possible deadlocks.
+ */
+ inode = inode_ref(loc->inode);
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret) {
+ UNLOCK(&inode->lock);
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
+ "Failed to get inode_ctx for %s", loc->name);
+ goto post_unlock;
+ }
- priv = this->private;
+ old_spb_choice = ctx->spb_choice;
+ ctx->spb_choice = data->spb_child_index;
- for (i = 0; i < priv->child_count; i++) {
- if (data_accused[i])
- continue;
- if (replies[i].poststat.ia_size > maxsize)
- maxsize = replies[i].poststat.ia_size;
- }
+ /* Possible changes in spb-choice :
+ * valid to -1 : cancel timer and unref
+ * valid to valid : cancel timer and inject new one
+ * -1 to -1 : unref and do not do anything
+ * -1 to valid : inject timer
+ */
- for (i = 0; i < priv->child_count; i++) {
- if (data_accused[i])
- continue;
- if (replies[i].poststat.ia_size < maxsize)
- data_accused[i] = 1;
- }
+ /* ctx->timer is NULL iff previous value of
+ * ctx->spb_choice is -1
+ */
+ if (ctx->timer) {
+ if (ctx->spb_choice == -1) {
+ if (!gf_timer_call_cancel(this->ctx, ctx->timer)) {
+ ctx->timer = NULL;
+ timer_cancelled = _gf_true;
+ }
+ /* If timer cancel failed here it means that the
+ * previous cbk will be executed which will set
+ * spb_choice to -1. So we can consider the
+ * 'valid to -1' case to be a success
+ * (i.e. ret = 0) and goto unlock.
+ */
+ goto unlock;
+ }
+ goto reset_timer;
+ } else {
+ if (ctx->spb_choice == -1)
+ goto unlock;
+ goto set_timer;
+ }
- return 0;
+ reset_timer:
+ ret = gf_timer_call_cancel(this->ctx, ctx->timer);
+ if (ret != 0) {
+ /* We need to bail out now instead of launching a new
+ * timer. Otherwise the cbk of the previous timer event
+ * will cancel the new ctx->timer.
+ */
+ ctx->spb_choice = old_spb_choice;
+ ret = -1;
+ op_errno = EAGAIN;
+ goto unlock;
+ }
+ ctx->timer = NULL;
+ timer_reset = _gf_true;
+
+ set_timer:
+ ctx->timer = gf_timer_call_after(this->ctx, delta,
+ afr_set_split_brain_choice_cbk, inode);
+ if (!ctx->timer) {
+ ctx->spb_choice = old_spb_choice;
+ ret = -1;
+ op_errno = ENOMEM;
+ }
+ if (!timer_reset && ctx->timer)
+ timer_set = _gf_true;
+ if (timer_reset && !ctx->timer)
+ timer_cancelled = _gf_true;
+ }
+unlock:
+ UNLOCK(&inode->lock);
+post_unlock:
+ if (!timer_set)
+ inode_unref(inode);
+ if (timer_cancelled)
+ inode_unref(inode);
+ /*
+ * We need to invalidate the inode to prevent the kernel from serving
+ * reads from an older cached value despite a change in spb_choice to
+ * a new value.
+ */
+ inode_invalidate(inode);
+out:
+ GF_FREE(data);
+ AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
+ return 0;
}
-
int
-afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- struct afr_reply *replies = NULL;
- int event_generation = 0;
- int i = 0;
- unsigned char *data_accused = NULL;
- unsigned char *metadata_accused = NULL;
- unsigned char *data_readable = NULL;
- unsigned char *metadata_readable = NULL;
- int ret = 0;
-
- local = frame->local;
- priv = this->private;
- replies = local->replies;
- event_generation = local->event_generation;
-
- data_accused = alloca0 (priv->child_count);
- data_readable = alloca0 (priv->child_count);
- metadata_accused = alloca0 (priv->child_count);
- metadata_readable = alloca0 (priv->child_count);
-
- for (i = 0; i < priv->child_count; i++) {
- data_readable[i] = 1;
- metadata_readable[i] = 1;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (!replies[i].valid) {
- data_readable[i] = 0;
- metadata_readable[i] = 0;
- continue;
- }
-
- if (replies[i].op_ret == -1) {
- data_readable[i] = 0;
- metadata_readable[i] = 0;
- continue;
- }
-
- afr_accused_fill (this, replies[i].xdata, data_accused,
- (replies[i].poststat.ia_type == IA_IFDIR) ?
- AFR_ENTRY_TRANSACTION : AFR_DATA_TRANSACTION);
-
- afr_accused_fill (this, replies[i].xdata,
- metadata_accused, AFR_METADATA_TRANSACTION);
-
- }
-
- if (inode->ia_type != IA_IFDIR)
- afr_accuse_smallfiles (this, replies, data_accused);
-
- for (i = 0; i < priv->child_count; i++) {
- if (data_accused[i]) {
- data_readable[i] = 0;
- ret = 1;
- }
- if (metadata_accused[i]) {
- metadata_readable[i] = 0;
- ret = 1;
- }
- }
-
- afr_inode_read_subvol_set (inode, this, data_readable,
- metadata_readable, event_generation);
- return ret;
+afr_accused_fill(xlator_t *this, dict_t *xdata, unsigned char *accused,
+ afr_transaction_type type)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int idx = afr_index_for_transaction_type(type);
+ void *pending_raw = NULL;
+ int pending[3];
+ int ret = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_get_ptr(xdata, priv->pending_key[i], &pending_raw);
+ if (ret) /* no pending flags */
+ continue;
+ memcpy(pending, pending_raw, sizeof(pending));
+
+ if (ntoh32(pending[idx]))
+ accused[i] = 1;
+ }
+
+ return 0;
}
-
-
int
-afr_refresh_selfheal_done (int ret, call_frame_t *heal, void *opaque)
+afr_accuse_smallfiles(xlator_t *this, struct afr_reply *replies,
+ unsigned char *data_accused)
{
- if (heal)
- STACK_DESTROY (heal->root);
- return 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint64_t maxsize = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].xdata &&
+ dict_get_sizen(replies[i].xdata, GLUSTERFS_BAD_INODE))
+ continue;
+ if (data_accused[i])
+ continue;
+ if (replies[i].poststat.ia_size > maxsize)
+ maxsize = replies[i].poststat.ia_size;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_accused[i])
+ continue;
+ if (AFR_IS_ARBITER_BRICK(priv, i))
+ continue;
+ if (replies[i].poststat.ia_size < maxsize)
+ data_accused[i] = 1;
+ }
+
+ return 0;
}
int
-afr_inode_refresh_err (call_frame_t *frame, xlator_t *this)
+afr_readables_fill(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *data_accused, unsigned char *metadata_accused,
+ unsigned char *data_readable,
+ unsigned char *metadata_readable, struct afr_reply *replies)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int err = 0;
-
- local = frame->local;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->replies[i].valid && !local->replies[i].op_ret) {
- err = 0;
- goto ret;
- }
- }
-
- err = afr_final_errno (local, priv);
-ret:
- return -err;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ int i = 0;
+ int ret = 0;
+ ia_type_t ia_type = IA_INVAL;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ data_readable[i] = 1;
+ metadata_readable[i] = 1;
+ }
+ if (AFR_IS_ARBITER_BRICK(priv, ARBITER_BRICK_INDEX)) {
+ data_readable[ARBITER_BRICK_INDEX] = 0;
+ metadata_readable[ARBITER_BRICK_INDEX] = 0;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies) { /* Lookup */
+ if (!replies[i].valid || replies[i].op_ret == -1 ||
+ (replies[i].xdata &&
+ dict_get_sizen(replies[i].xdata, GLUSTERFS_BAD_INODE))) {
+ data_readable[i] = 0;
+ metadata_readable[i] = 0;
+ continue;
+ }
+
+ xdata = replies[i].xdata;
+ ia_type = replies[i].poststat.ia_type;
+ } else { /* pre-op xattrop */
+ xdata = local->transaction.changelog_xdata[i];
+ ia_type = inode->ia_type;
+ }
+
+ if (!xdata)
+ continue; /* mkdir_cbk sends NULL xdata_rsp. */
+ afr_accused_fill(this, xdata, data_accused,
+ (ia_type == IA_IFDIR) ? AFR_ENTRY_TRANSACTION
+ : AFR_DATA_TRANSACTION);
+
+ afr_accused_fill(this, xdata, metadata_accused,
+ AFR_METADATA_TRANSACTION);
+ }
+
+ if (replies && ia_type != IA_INVAL && ia_type != IA_IFDIR &&
+ /* We want to accuse small files only when we know for
+ * sure that there is no IO happening. Otherwise, the
+ * ia_sizes obtained in post-refresh replies may
+ * mismatch due to a race between inode-refresh and
+ * ongoing writes, causing spurious heal launches*/
+ !afr_is_possibly_under_txn(AFR_DATA_TRANSACTION, local, this)) {
+ afr_accuse_smallfiles(this, replies, data_accused);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_accused[i]) {
+ data_readable[i] = 0;
+ ret = 1;
+ }
+ if (metadata_accused[i]) {
+ metadata_readable[i] = 0;
+ ret = 1;
+ }
+ }
+ return ret;
}
-
int
-afr_refresh_selfheal_wrap (void *opaque)
+afr_replies_interpret(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ gf_boolean_t *start_heal)
{
- call_frame_t *frame = opaque;
- afr_local_t *local = NULL;
- xlator_t *this = NULL;
- int err = 0;
-
- local = frame->local;
- this = frame->this;
-
- afr_selfheal (frame->this, local->refreshinode->gfid);
-
- afr_selfheal_unlocked_discover (frame, local->refreshinode,
- local->refreshinode->gfid,
- local->replies);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int event_generation = 0;
+ int i = 0;
+ unsigned char *data_accused = NULL;
+ unsigned char *metadata_accused = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ priv = this->private;
+ replies = local->replies;
+ event_generation = local->event_generation;
+
+ data_accused = alloca0(priv->child_count);
+ data_readable = alloca0(priv->child_count);
+ metadata_accused = alloca0(priv->child_count);
+ metadata_readable = alloca0(priv->child_count);
+
+ ret = afr_readables_fill(frame, this, inode, data_accused, metadata_accused,
+ data_readable, metadata_readable, replies);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (start_heal && priv->child_up[i] &&
+ (data_accused[i] || metadata_accused[i])) {
+ *start_heal = _gf_true;
+ break;
+ }
+ }
+ afr_inode_read_subvol_set(inode, this, data_readable, metadata_readable,
+ event_generation);
+ return ret;
+}
- afr_replies_interpret (frame, this, local->refreshinode);
+int
+afr_refresh_selfheal_done(int ret, call_frame_t *heal, void *opaque)
+{
+ if (heal)
+ AFR_STACK_DESTROY(heal);
+ return 0;
+}
- err = afr_inode_refresh_err (frame, this);
+int
+afr_inode_refresh_err(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int err = 0;
- afr_local_replies_wipe (local, this->private);
+ local = frame->local;
+ priv = this->private;
- local->refreshfn (frame, this, err);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && !local->replies[i].op_ret) {
+ err = 0;
+ goto ret;
+ }
+ }
- return 0;
+ err = afr_final_errno(local, priv);
+ret:
+ return err;
}
-
gf_boolean_t
-afr_selfheal_enabled (xlator_t *this)
+afr_selfheal_enabled(const xlator_t *this)
{
- afr_private_t *priv = NULL;
- gf_boolean_t data = _gf_false;
- int ret = 0;
-
- priv = this->private;
-
- ret = gf_string2boolean (priv->data_self_heal, &data);
- GF_ASSERT (!ret);
+ const afr_private_t *priv = this->private;
- return data || priv->metadata_self_heal || priv->entry_self_heal;
+ return priv->data_self_heal || priv->metadata_self_heal ||
+ priv->entry_self_heal;
}
-
int
-afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
+afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
{
- call_frame_t *heal = NULL;
- afr_local_t *local = NULL;
- int ret = 0;
- int err = 0;
-
- local = frame->local;
-
- ret = afr_replies_interpret (frame, this, local->refreshinode);
-
- err = afr_inode_refresh_err (frame, this);
-
- afr_local_replies_wipe (local, this->private);
+ call_frame_t *heal_frame = NULL;
+ afr_local_t *heal_local = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
+ int event_generation = 0;
+ int read_subvol = -1;
+ int ret = 0;
+
+ local = frame->local;
+ inode = local->inode;
+ priv = this->private;
+
+ if (err)
+ goto refresh_done;
+
+ if (local->op == GF_FOP_LOOKUP)
+ goto refresh_done;
+
+ ret = afr_inode_get_readable(frame, inode, this, local->readable,
+ &event_generation, local->transaction.type);
+
+ if (ret == -EIO) {
+ /* No readable subvolume even after refresh ==> splitbrain.*/
+ if (!priv->fav_child_policy) {
+ err = EIO;
+ goto refresh_done;
+ }
+ read_subvol = afr_sh_get_fav_by_policy(this, local->replies, inode,
+ NULL);
+ if (read_subvol == -1) {
+ err = EIO;
+ goto refresh_done;
+ }
+
+ heal_frame = afr_frame_create(this, NULL);
+ if (!heal_frame) {
+ err = EIO;
+ goto refresh_done;
+ }
+ heal_local = heal_frame->local;
+ heal_local->xdata_req = dict_new();
+ if (!heal_local->xdata_req) {
+ err = EIO;
+ AFR_STACK_DESTROY(heal_frame);
+ goto refresh_done;
+ }
+ heal_local->heal_frame = frame;
+ ret = synctask_new(this->ctx->env, afr_fav_child_reset_sink_xattrs,
+ afr_fav_child_reset_sink_xattrs_cbk, heal_frame,
+ heal_frame);
+ return 0;
+ }
- if (ret && afr_selfheal_enabled (this)) {
- heal = copy_frame (frame);
- if (heal)
- heal->root->pid = GF_CLIENT_PID_AFR_SELF_HEALD;
- ret = synctask_new (this->ctx->env, afr_refresh_selfheal_wrap,
- afr_refresh_selfheal_done, heal, frame);
- if (ret)
- goto refresh_done;
- } else {
- refresh_done:
- local->refreshfn (frame, this, err);
- }
+refresh_done:
+ afr_local_replies_wipe(local, this->private);
+ local->refreshfn(frame, this, err);
- return 0;
+ return 0;
}
-
int
-afr_inode_refresh_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata, struct iatt *par)
+afr_inode_refresh_done(call_frame_t *frame, xlator_t *this, int error)
{
- afr_local_t *local = NULL;
- int call_child = (long) cookie;
- int call_count = 0;
-
- local = frame->local;
+ call_frame_t *heal_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t start_heal = _gf_false;
+ afr_local_t *heal_local = NULL;
+ unsigned char *success_replies = NULL;
+ int ret = 0;
+
+ if (error != 0) {
+ goto refresh_done;
+ }
+
+ local = frame->local;
+ priv = this->private;
+ success_replies = alloca0(priv->child_count);
+ afr_fill_success_replies(local, priv, success_replies);
+
+ if (priv->thin_arbiter_count && local->is_read_txn &&
+ AFR_COUNT(success_replies, priv->child_count) != priv->child_count) {
+ /* We need to query the good bricks and/or thin-arbiter.*/
+ if (success_replies[0]) {
+ local->read_txn_query_child = AFR_CHILD_ZERO;
+ } else if (success_replies[1]) {
+ local->read_txn_query_child = AFR_CHILD_ONE;
+ }
+ error = EINVAL;
+ goto refresh_done;
+ }
+
+ if (!afr_has_quorum(success_replies, this, frame)) {
+ error = afr_final_errno(frame->local, this->private);
+ if (!error)
+ error = afr_quorum_errno(priv);
+ goto refresh_done;
+ }
+
+ ret = afr_replies_interpret(frame, this, local->refreshinode, &start_heal);
+
+ if (ret && afr_selfheal_enabled(this) && start_heal) {
+ heal_frame = afr_frame_create(this, NULL);
+ if (!heal_frame)
+ goto refresh_done;
+ heal_local = heal_frame->local;
+ heal_local->refreshinode = inode_ref(local->refreshinode);
+ heal_local->heal_frame = heal_frame;
+ if (!afr_throttled_selfheal(heal_frame, this)) {
+ AFR_STACK_DESTROY(heal_frame);
+ goto refresh_done;
+ }
+ }
+
+refresh_done:
+ afr_txn_refresh_done(frame, this, error);
+
+ return 0;
+}
- local->replies[call_child].valid = 1;
- local->replies[call_child].op_ret = op_ret;
- local->replies[call_child].op_errno = op_errno;
- if (op_ret != -1) {
- local->replies[call_child].poststat = *buf;
- local->replies[call_child].postparent = *par;
- local->replies[call_child].xdata = dict_ref (xdata);
- }
+void
+afr_inode_refresh_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *buf,
+ dict_t *xdata, struct iatt *par)
+{
+ afr_local_t *local = NULL;
+ int call_child = (long)cookie;
+ int8_t need_heal = 1;
+ int call_count = 0;
+ int ret = 0;
+
+ local = frame->local;
+ local->replies[call_child].valid = 1;
+ local->replies[call_child].op_ret = op_ret;
+ local->replies[call_child].op_errno = op_errno;
+ if (op_ret != -1) {
+ local->replies[call_child].poststat = *buf;
+ if (par)
+ local->replies[call_child].postparent = *par;
+ if (xdata)
+ local->replies[call_child].xdata = dict_ref(xdata);
+ }
- call_count = afr_frame_return (frame);
+ if (xdata) {
+ ret = dict_get_int8(xdata, "link-count", &need_heal);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to get link count");
+ }
+ }
- if (call_count == 0)
- afr_inode_refresh_done (frame, this);
+ local->replies[call_child].need_heal = need_heal;
+ call_count = afr_frame_return(frame);
+ if (call_count == 0) {
+ afr_set_need_heal(this, local);
+ ret = afr_inode_refresh_err(frame, this);
+ if (ret) {
+ gf_msg_debug(this->name, ret, "afr_inode_refresh_err failed");
+ }
+ afr_inode_refresh_done(frame, this, ret);
+ }
+}
- return 0;
+int
+afr_inode_refresh_subvol_with_lookup_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *par)
+{
+ afr_inode_refresh_subvol_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ xdata, par);
+ return 0;
}
+int
+afr_inode_refresh_subvol_with_lookup(call_frame_t *frame, xlator_t *this, int i,
+ inode_t *inode, uuid_t gfid, dict_t *xdata)
+{
+ loc_t loc = {
+ 0,
+ };
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ loc.inode = inode;
+ if (gf_uuid_is_null(inode->gfid) && gfid) {
+ /* To handle setattr/setxattr on yet to be linked inode from
+ * dht */
+ gf_uuid_copy(loc.gfid, gfid);
+ } else {
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ }
+
+ STACK_WIND_COOKIE(frame, afr_inode_refresh_subvol_with_lookup_cbk,
+ (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->lookup, &loc, xdata);
+ return 0;
+}
int
-afr_inode_refresh_subvol (call_frame_t *frame, xlator_t *this, int i,
- inode_t *inode, dict_t *xdata)
+afr_inode_refresh_subvol_with_fstat_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- loc_t loc = {0, };
- afr_private_t *priv = NULL;
+ afr_inode_refresh_subvol_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ xdata, NULL);
+ return 0;
+}
- priv = this->private;
+int
+afr_inode_refresh_subvol_with_fstat(call_frame_t *frame, xlator_t *this, int i,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- loc.inode = inode;
- gf_uuid_copy (loc.gfid, inode->gfid);
+ priv = this->private;
+ local = frame->local;
- STACK_WIND_COOKIE (frame, afr_inode_refresh_subvol_cbk,
- (void *) (long) i, priv->children[i],
- priv->children[i]->fops->lookup, &loc, xdata);
- return 0;
+ STACK_WIND_COOKIE(frame, afr_inode_refresh_subvol_with_fstat_cbk,
+ (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->fstat, local->fd, xdata);
+ return 0;
}
-
int
-afr_inode_refresh_do (call_frame_t *frame, xlator_t *this)
+afr_inode_refresh_do(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t *xdata = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
+ int ret = 0;
+ dict_t *xdata = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ unsigned char *wind_subvols = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ wind_subvols = alloca0(priv->child_count);
+
+ afr_local_replies_wipe(local, priv);
+
+ if (local->fd) {
+ fd_ctx = afr_fd_ctx_get(local->fd, this);
+ if (!fd_ctx) {
+ afr_inode_refresh_done(frame, this, EINVAL);
+ return 0;
+ }
+ }
+
+ xdata = dict_new();
+ if (!xdata) {
+ afr_inode_refresh_done(frame, this, ENOMEM);
+ return 0;
+ }
- priv = this->private;
- local = frame->local;
+ ret = afr_xattr_req_prepare(this, xdata);
+ if (ret != 0) {
+ dict_unref(xdata);
+ afr_inode_refresh_done(frame, this, -ret);
+ return 0;
+ }
- afr_local_replies_wipe (local, priv);
+ ret = dict_set_sizen_str_sizen(xdata, "link-count", GF_XATTROP_INDEX_COUNT);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to set link-count in dict ");
+ }
- xdata = dict_new ();
- if (!xdata) {
- afr_inode_refresh_done (frame, this);
- return 0;
- }
+ ret = dict_set_str_sizen(xdata, GLUSTERFS_INODELK_DOM_COUNT, this->name);
+ if (ret) {
+ gf_msg_debug(this->name, -ret,
+ "Unable to set inodelk-dom-count in dict ");
+ }
- if (afr_xattr_req_prepare (this, xdata) != 0) {
- dict_unref (xdata);
- afr_inode_refresh_done (frame, this);
- return 0;
- }
+ if (local->fd) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] && fd_ctx->opened_on[i] == AFR_FD_OPENED)
+ wind_subvols[i] = 1;
+ }
+ } else {
+ memcpy(wind_subvols, local->child_up,
+ sizeof(*local->child_up) * priv->child_count);
+ }
- local->call_count = AFR_COUNT (local->child_up, priv->child_count);
+ local->call_count = AFR_COUNT(wind_subvols, priv->child_count);
- call_count = local->call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i])
- continue;
+ call_count = local->call_count;
+ if (!call_count) {
+ dict_unref(xdata);
+ if (local->fd && AFR_COUNT(local->child_up, priv->child_count))
+ afr_inode_refresh_done(frame, this, EBADFD);
+ else
+ afr_inode_refresh_done(frame, this, ENOTCONN);
+ return 0;
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!wind_subvols[i])
+ continue;
- afr_inode_refresh_subvol (frame, this, i, local->refreshinode,
- xdata);
+ if (local->fd)
+ afr_inode_refresh_subvol_with_fstat(frame, this, i, xdata);
+ else
+ afr_inode_refresh_subvol_with_lookup(
+ frame, this, i, local->refreshinode, local->refreshgfid, xdata);
- if (!--call_count)
- break;
- }
+ if (!--call_count)
+ break;
+ }
- dict_unref (xdata);
+ dict_unref(xdata);
- return 0;
+ return 0;
}
-
int
-afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode,
- afr_inode_refresh_cbk_t refreshfn)
+afr_inode_refresh(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ uuid_t gfid, afr_inode_refresh_cbk_t refreshfn)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- local->refreshfn = refreshfn;
+ local->refreshfn = refreshfn;
- if (local->refreshinode) {
- inode_unref (local->refreshinode);
- local->refreshinode = NULL;
- }
+ if (local->refreshinode) {
+ inode_unref(local->refreshinode);
+ local->refreshinode = NULL;
+ }
- local->refreshinode = inode_ref (inode);
+ local->refreshinode = inode_ref(inode);
- afr_inode_refresh_do (frame, this);
+ if (gfid)
+ gf_uuid_copy(local->refreshgfid, gfid);
+ else
+ gf_uuid_clear(local->refreshgfid);
- return 0;
-}
+ afr_inode_refresh_do(frame, this);
+ return 0;
+}
int
-afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req)
+afr_xattr_req_prepare(xlator_t *this, dict_t *xattr_req)
{
- int i = 0;
- afr_private_t *priv = NULL;
- int ret = 0;
-
- priv = this->private;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req, priv->pending_key[i],
- AFR_NUM_CHANGE_LOGS * sizeof(int));
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value for %s",
- priv->pending_key[i]);
- /* 3 = data+metadata+entry */
- }
- ret = dict_set_uint64 (xattr_req, AFR_DIRTY,
- AFR_NUM_CHANGE_LOGS * sizeof(int));
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG, "failed to set dirty "
- "query flag");
- }
+ priv = this->private;
- ret = dict_set_int32 (xattr_req, "list-xattr", 1);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Unable to set list-xattr in dict ");
- }
-
- return ret;
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_uint64(xattr_req, priv->pending_key[i],
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret < 0)
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Unable to set dict value for %s", priv->pending_key[i]);
+ /* 3 = data+metadata+entry */
+ }
+ ret = dict_set_uint64(xattr_req, AFR_DIRTY,
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret) {
+ gf_msg_debug(this->name, -ret,
+ "failed to set dirty "
+ "query flag");
+ }
+
+ ret = dict_set_int32_sizen(xattr_req, "list-xattr", 1);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to set list-xattr in dict ");
+ }
+
+ return ret;
}
int
-afr_lookup_xattr_req_prepare (afr_local_t *local, xlator_t *this,
- dict_t *xattr_req, loc_t *loc)
+afr_lookup_xattr_req_prepare(afr_local_t *local, xlator_t *this,
+ dict_t *xattr_req, loc_t *loc)
{
- int ret = -ENOMEM;
-
- if (!local->xattr_req)
- local->xattr_req = dict_new ();
-
- if (!local->xattr_req)
- goto out;
+ int ret = -ENOMEM;
+
+ if (!local->xattr_req)
+ local->xattr_req = dict_new();
+
+ if (!local->xattr_req)
+ goto out;
+
+ if (xattr_req && (xattr_req != local->xattr_req))
+ dict_copy(xattr_req, local->xattr_req);
+
+ ret = afr_xattr_req_prepare(this, local->xattr_req);
+
+ ret = dict_set_uint64(local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Unable to set dict value for %s", loc->path,
+ GLUSTERFS_INODELK_COUNT);
+ }
+ ret = dict_set_uint64(local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Unable to set dict value for %s", loc->path,
+ GLUSTERFS_ENTRYLK_COUNT);
+ }
+
+ ret = dict_set_uint32(local->xattr_req, GLUSTERFS_PARENT_ENTRYLK, 0);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Unable to set dict value for %s", loc->path,
+ GLUSTERFS_PARENT_ENTRYLK);
+ }
+
+ ret = dict_set_sizen_str_sizen(local->xattr_req, "link-count",
+ GF_XATTROP_INDEX_COUNT);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to set link-count in dict ");
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
- if (xattr_req != local->xattr_req)
- dict_copy (xattr_req, local->xattr_req);
+int
+afr_least_pending_reads_child(afr_private_t *priv, unsigned char *readable)
+{
+ int i = 0;
+ int child = -1;
+ int64_t read_iter = -1;
+ int64_t pending_read = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (AFR_IS_ARBITER_BRICK(priv, i) || !readable[i])
+ continue;
+ read_iter = GF_ATOMIC_GET(priv->pending_reads[i]);
+ if (child == -1 || read_iter < pending_read) {
+ pending_read = read_iter;
+ child = i;
+ }
+ }
+
+ return child;
+}
- ret = afr_xattr_req_prepare (this, local->xattr_req);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to prepare xattr_req", loc->path);
- }
+static int32_t
+afr_least_latency_child(afr_private_t *priv, unsigned char *readable)
+{
+ int32_t i = 0;
+ int child = -1;
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, GLUSTERFS_INODELK_COUNT);
- }
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, GLUSTERFS_ENTRYLK_COUNT);
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (AFR_IS_ARBITER_BRICK(priv, i) || !readable[i] ||
+ priv->child_latency[i] < 0)
+ continue;
- ret = dict_set_uint32 (local->xattr_req, GLUSTERFS_PARENT_ENTRYLK, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, GLUSTERFS_PARENT_ENTRYLK);
+ if (child == -1 ||
+ priv->child_latency[i] < priv->child_latency[child]) {
+ child = i;
}
-
- ret = 0;
-out:
- return ret;
+ }
+ return child;
}
+static int32_t
+afr_least_latency_times_pending_reads_child(afr_private_t *priv,
+ unsigned char *readable)
+{
+ int32_t i = 0;
+ int child = -1;
+ int64_t pending_read = 0;
+ int64_t latency = -1;
+ int64_t least_latency = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (AFR_IS_ARBITER_BRICK(priv, i) || !readable[i] ||
+ priv->child_latency[i] < 0)
+ continue;
+
+ pending_read = GF_ATOMIC_GET(priv->pending_reads[i]);
+ latency = (pending_read + 1) * priv->child_latency[i];
+
+ if (child == -1 || latency < least_latency) {
+ least_latency = latency;
+ child = i;
+ }
+ }
+ return child;
+}
int
-afr_hash_child (inode_t *inode, int32_t child_count, int hashmode)
+afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv,
+ unsigned char *readable)
{
- uuid_t gfid_copy = {0,};
- pid_t pid;
-
- if (!hashmode) {
- return -1;
- }
-
- if (inode) {
- gf_uuid_copy (gfid_copy, inode->gfid);
- }
-
- if (hashmode > 1 && inode->ia_type != IA_IFDIR) {
+ uuid_t gfid_copy = {
+ 0,
+ };
+ pid_t pid;
+ int child = -1;
+
+ switch (priv->hash_mode) {
+ case AFR_READ_POLICY_FIRST_UP:
+ break;
+ case AFR_READ_POLICY_GFID_HASH:
+ gf_uuid_copy(gfid_copy, args->gfid);
+ child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) %
+ priv->child_count;
+ break;
+ case AFR_READ_POLICY_GFID_PID_HASH:
+ if (args->ia_type != IA_IFDIR) {
/*
* Why getpid? Because it's one of the cheapest calls
- * available - faster than gethostname etc. - and returns a
- * constant-length value that's sure to be shorter than a UUID.
- * It's still very unlikely to be the same across clients, so
- * it still provides good mixing. We're not trying for
- * perfection here. All we need is a low probability that
- * multiple clients won't converge on the same subvolume.
+ * available - faster than gethostname etc. - and
+ * returns a constant-length value that's sure to be
+ * shorter than a UUID. It's still very unlikely to be
+ * the same across clients, so it still provides good
+ * mixing. We're not trying for perfection here. All we
+ * need is a low probability that multiple clients
+ * won't converge on the same subvolume.
*/
+ gf_uuid_copy(gfid_copy, args->gfid);
pid = getpid();
- memcpy (gfid_copy, &pid, sizeof(pid));
- }
-
- return SuperFastHash((char *)gfid_copy,
- sizeof(gfid_copy)) % child_count;
+ *(pid_t *)gfid_copy ^= pid;
+ }
+ child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) %
+ priv->child_count;
+ break;
+ case AFR_READ_POLICY_LESS_LOAD:
+ child = afr_least_pending_reads_child(priv, readable);
+ break;
+ case AFR_READ_POLICY_LEAST_LATENCY:
+ child = afr_least_latency_child(priv, readable);
+ break;
+ case AFR_READ_POLICY_LOAD_LATENCY_HYBRID:
+ child = afr_least_latency_times_pending_reads_child(priv, readable);
+ break;
+ }
+
+ return child;
}
-
int
-afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this,
- unsigned char *readable)
+afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
+ unsigned char *readable,
+ afr_read_subvol_args_t *args)
{
- afr_private_t *priv = NULL;
- int read_subvol = -1;
- int i = 0;
-
- priv = this->private;
-
- /* first preference - explicitly specified or local subvolume */
- if (priv->read_child >= 0 && readable[priv->read_child])
- return priv->read_child;
-
- /* second preference - use hashed mode */
- read_subvol = afr_hash_child (inode, priv->child_count,
- priv->hash_mode);
- if (read_subvol >= 0 && readable[read_subvol])
- return read_subvol;
-
- for (i = 0; i < priv->child_count; i++) {
- if (readable[i])
- return i;
- }
-
- /* no readable subvolumes, either split brain or all subvols down */
-
- return -1;
+ int i = 0;
+ int read_subvol = -1;
+ afr_private_t *priv = NULL;
+ afr_read_subvol_args_t local_args = {
+ 0,
+ };
+
+ priv = this->private;
+
+ /* first preference - explicitly specified or local subvolume */
+ if (priv->read_child >= 0 && readable[priv->read_child])
+ return priv->read_child;
+
+ if (inode_is_linked(inode)) {
+ gf_uuid_copy(local_args.gfid, inode->gfid);
+ local_args.ia_type = inode->ia_type;
+ } else if (args) {
+ local_args = *args;
+ }
+
+ /* second preference - use hashed mode */
+ read_subvol = afr_hash_child(&local_args, priv, readable);
+ if (read_subvol >= 0 && readable[read_subvol])
+ return read_subvol;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (readable[i])
+ return i;
+ }
+
+ /* no readable subvolumes, either split brain or all subvols down */
+
+ return -1;
}
-
int
-afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,
- unsigned char *readable, int *event_p,
- int type)
+afr_inode_read_subvol_type_get(inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p, int type)
{
- int ret = -1;
+ int ret = -1;
- if (type == AFR_METADATA_TRANSACTION)
- ret = afr_inode_read_subvol_get (inode, this, 0, readable,
- event_p);
- else
- ret = afr_inode_read_subvol_get (inode, this, readable, 0,
- event_p);
- return ret;
+ if (type == AFR_METADATA_TRANSACTION)
+ ret = afr_inode_read_subvol_get(inode, this, 0, readable, event_p);
+ else
+ ret = afr_inode_read_subvol_get(inode, this, readable, 0, event_p);
+ return ret;
}
+void
+afr_readables_intersect_get(inode_t *inode, xlator_t *this, int *event,
+ unsigned char *intersection)
+{
+ afr_private_t *priv = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ unsigned char *intersect = NULL;
+
+ priv = this->private;
+ data_readable = alloca0(priv->child_count);
+ metadata_readable = alloca0(priv->child_count);
+ intersect = alloca0(priv->child_count);
+
+ afr_inode_read_subvol_get(inode, this, data_readable, metadata_readable,
+ event);
+
+ AFR_INTERSECT(intersect, data_readable, metadata_readable,
+ priv->child_count);
+ if (intersection)
+ memcpy(intersection, intersect,
+ sizeof(*intersection) * priv->child_count);
+}
int
-afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
- int *event_p, afr_transaction_type type)
+afr_read_subvol_get(inode_t *inode, xlator_t *this, int *subvol_p,
+ unsigned char *readables, int *event_p,
+ afr_transaction_type type, afr_read_subvol_args_t *args)
{
- afr_private_t *priv = NULL;
- unsigned char *data_readable = NULL;
- unsigned char *metadata_readable = NULL;
- unsigned char *readable = NULL;
- unsigned char *intersection = NULL;
- int subvol = -1;
- int event = 0;
-
- priv = this->private;
-
- readable = alloca0 (priv->child_count);
- data_readable = alloca0 (priv->child_count);
- metadata_readable = alloca0 (priv->child_count);
- intersection = alloca0 (priv->child_count);
-
- afr_inode_read_subvol_type_get (inode, this, readable, &event, type);
-
- afr_inode_read_subvol_get (inode, this, data_readable, metadata_readable,
- &event);
-
- AFR_INTERSECT (intersection, data_readable, metadata_readable,
- priv->child_count);
-
- if (AFR_COUNT (intersection, priv->child_count) > 0)
- subvol = afr_read_subvol_select_by_policy (inode, this,
- intersection);
- else
- subvol = afr_read_subvol_select_by_policy (inode, this,
- readable);
- if (subvol_p)
- *subvol_p = subvol;
- if (event_p)
- *event_p = event;
- return subvol;
+ afr_private_t *priv = NULL;
+ unsigned char *readable = NULL;
+ unsigned char *intersection = NULL;
+ int subvol = -1;
+ int event = 0;
+
+ priv = this->private;
+
+ readable = alloca0(priv->child_count);
+ intersection = alloca0(priv->child_count);
+
+ afr_inode_read_subvol_type_get(inode, this, readable, &event, type);
+
+ afr_readables_intersect_get(inode, this, &event, intersection);
+
+ if (AFR_COUNT(intersection, priv->child_count) > 0)
+ subvol = afr_read_subvol_select_by_policy(inode, this, intersection,
+ args);
+ else
+ subvol = afr_read_subvol_select_by_policy(inode, this, readable, args);
+ if (subvol_p)
+ *subvol_p = subvol;
+ if (event_p)
+ *event_p = event;
+ if (readables)
+ memcpy(readables, readable, sizeof(*readables) * priv->child_count);
+ return subvol;
}
-
void
-afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
+afr_local_transaction_cleanup(afr_local_t *local, xlator_t *this)
{
- afr_private_t *priv = NULL;
- int i = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
- priv = this->private;
+ priv = this->private;
- afr_matrix_cleanup (local->pending, priv->child_count);
+ afr_matrix_cleanup(local->pending, priv->child_count);
- GF_FREE (local->internal_lock.locked_nodes);
+ GF_FREE(local->internal_lock.lower_locked_nodes);
- for (i = 0; local->internal_lock.inodelk[i].domain; i++) {
- GF_FREE (local->internal_lock.inodelk[i].locked_nodes);
- }
+ afr_lockees_cleanup(&local->internal_lock);
- GF_FREE (local->internal_lock.lower_locked_nodes);
+ GF_FREE(local->transaction.pre_op);
- afr_entry_lockee_cleanup (&local->internal_lock);
-
- GF_FREE (local->transaction.pre_op);
- GF_FREE (local->transaction.eager_lock);
- GF_FREE (local->transaction.fop_subvols);
- GF_FREE (local->transaction.failed_subvols);
+ GF_FREE(local->transaction.pre_op_sources);
+ if (local->transaction.changelog_xdata) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.changelog_xdata[i])
+ continue;
+ dict_unref(local->transaction.changelog_xdata[i]);
+ }
+ GF_FREE(local->transaction.changelog_xdata);
+ }
- GF_FREE (local->transaction.basename);
- GF_FREE (local->transaction.new_basename);
+ GF_FREE(local->transaction.failed_subvols);
- loc_wipe (&local->transaction.parent_loc);
- loc_wipe (&local->transaction.new_parent_loc);
+ GF_FREE(local->transaction.basename);
+ GF_FREE(local->transaction.new_basename);
+ loc_wipe(&local->transaction.parent_loc);
+ loc_wipe(&local->transaction.new_parent_loc);
}
+void
+afr_reply_wipe(struct afr_reply *reply)
+{
+ if (reply->xdata) {
+ dict_unref(reply->xdata);
+ reply->xdata = NULL;
+ }
+
+ if (reply->xattr) {
+ dict_unref(reply->xattr);
+ reply->xattr = NULL;
+ }
+}
void
-afr_replies_wipe (struct afr_reply *replies, int count)
+afr_replies_wipe(struct afr_reply *replies, int count)
{
- int i = 0;
+ int i = 0;
- for (i = 0; i < count; i++) {
- if (replies[i].xdata) {
- dict_unref (replies[i].xdata);
- replies[i].xdata = NULL;
- }
- }
+ for (i = 0; i < count; i++) {
+ afr_reply_wipe(&replies[i]);
+ }
}
void
-afr_local_replies_wipe (afr_local_t *local, afr_private_t *priv)
+afr_local_replies_wipe(afr_local_t *local, afr_private_t *priv)
{
+ if (!local->replies)
+ return;
- if (!local->replies)
- return;
-
- afr_replies_wipe (local->replies, priv->child_count);
+ afr_replies_wipe(local->replies, priv->child_count);
- memset (local->replies, 0, sizeof(*local->replies) * priv->child_count);
+ memset(local->replies, 0, sizeof(*local->replies) * priv->child_count);
}
-void
-afr_remove_eager_lock_stub (afr_local_t *local)
+static gf_boolean_t
+afr_fop_lock_is_unlock(call_frame_t *frame)
{
- LOCK (&local->fd->lock);
- {
- list_del_init (&local->transaction.eager_locked);
- }
- UNLOCK (&local->fd->lock);
+ afr_local_t *local = frame->local;
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ if ((F_UNLCK == local->cont.inodelk.in_flock.l_type) &&
+ (local->cont.inodelk.in_cmd == F_SETLKW ||
+ local->cont.inodelk.in_cmd == F_SETLK))
+ return _gf_true;
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ if (ENTRYLK_UNLOCK == local->cont.entrylk.in_cmd)
+ return _gf_true;
+ break;
+ default:
+ return _gf_false;
+ }
+ return _gf_false;
}
-void
-afr_local_cleanup (afr_local_t *local, xlator_t *this)
+static gf_boolean_t
+afr_lk_is_unlock(int32_t cmd, struct gf_flock *flock)
{
- afr_private_t * priv = NULL;
-
- if (!local)
- return;
-
- syncbarrier_destroy (&local->barrier);
+ switch (cmd) {
+ case F_RESLK_UNLCK:
+ return _gf_true;
+ break;
- if (local->transaction.eager_lock_on &&
- !list_empty (&local->transaction.eager_locked))
- afr_remove_eager_lock_stub (local);
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
- afr_local_transaction_cleanup (local, this);
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ if (F_UNLCK == flock->l_type)
+ return _gf_true;
+ break;
+ default:
+ return _gf_false;
+ }
+ return _gf_false;
+}
- priv = this->private;
+void
+afr_handle_inconsistent_fop(call_frame_t *frame, int32_t *op_ret,
+ int32_t *op_errno)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- loc_wipe (&local->loc);
- loc_wipe (&local->newloc);
+ if (!frame || !frame->this || !frame->local || !frame->this->private)
+ return;
- if (local->fd)
- fd_unref (local->fd);
+ if (*op_ret < 0)
+ return;
- if (local->xattr_req)
- dict_unref (local->xattr_req);
+ /* Failing inodelk/entrylk/lk here is not a good idea because we
+ * need to cleanup the locks on the other bricks if we choose to fail
+ * the fop here. The brick may go down just after unwind happens as well
+ * so anyways the fop will fail when the next fop is sent so leaving
+ * it like this for now.*/
+ local = frame->local;
+ switch (local->op) {
+ case GF_FOP_LOOKUP:
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ case GF_FOP_LK:
+ return;
+ default:
+ break;
+ }
- if (local->dict)
- dict_unref (local->dict);
+ priv = frame->this->private;
+ if (!priv->consistent_io)
+ return;
- afr_local_replies_wipe (local, priv);
- GF_FREE(local->replies);
+ if (local->event_generation &&
+ (local->event_generation != priv->event_generation))
+ goto inconsistent;
- GF_FREE (local->child_up);
+ return;
+inconsistent:
+ *op_ret = -1;
+ *op_errno = ENOTCONN;
+}
- GF_FREE (local->read_attempted);
+void
+afr_local_cleanup(afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
- GF_FREE (local->readable);
+ if (!local)
+ return;
- if (local->inode)
- inode_unref (local->inode);
+ syncbarrier_destroy(&local->barrier);
- if (local->parent)
- inode_unref (local->parent);
+ afr_local_transaction_cleanup(local, this);
- if (local->parent2)
- inode_unref (local->parent2);
+ priv = this->private;
- if (local->refreshinode)
- inode_unref (local->refreshinode);
+ loc_wipe(&local->loc);
+ loc_wipe(&local->newloc);
- { /* getxattr */
- GF_FREE (local->cont.getxattr.name);
- }
+ if (local->fd)
+ fd_unref(local->fd);
- { /* lk */
- GF_FREE (local->cont.lk.locked_nodes);
- }
+ if (local->xattr_req)
+ dict_unref(local->xattr_req);
- { /* create */
- if (local->cont.create.fd)
- fd_unref (local->cont.create.fd);
- if (local->cont.create.params)
- dict_unref (local->cont.create.params);
- }
+ if (local->xattr_rsp)
+ dict_unref(local->xattr_rsp);
- { /* mknod */
- if (local->cont.mknod.params)
- dict_unref (local->cont.mknod.params);
- }
+ if (local->dict)
+ dict_unref(local->dict);
- { /* mkdir */
- if (local->cont.mkdir.params)
- dict_unref (local->cont.mkdir.params);
- }
+ afr_local_replies_wipe(local, priv);
+ GF_FREE(local->replies);
- { /* symlink */
- if (local->cont.symlink.params)
- dict_unref (local->cont.symlink.params);
- }
+ GF_FREE(local->child_up);
- { /* writev */
- GF_FREE (local->cont.writev.vector);
- if (local->cont.writev.iobref)
- iobref_unref (local->cont.writev.iobref);
- }
+ GF_FREE(local->read_attempted);
+
+ GF_FREE(local->readable);
+ GF_FREE(local->readable2);
+
+ if (local->inode)
+ inode_unref(local->inode);
- { /* setxattr */
- if (local->cont.setxattr.dict)
- dict_unref (local->cont.setxattr.dict);
- }
+ if (local->parent)
+ inode_unref(local->parent);
+
+ if (local->parent2)
+ inode_unref(local->parent2);
+
+ if (local->refreshinode)
+ inode_unref(local->refreshinode);
+
+ { /* getxattr */
+ GF_FREE(local->cont.getxattr.name);
+ }
+
+ { /* lk */
+ GF_FREE(local->cont.lk.locked_nodes);
+ GF_FREE(local->cont.lk.dom_locked_nodes);
+ GF_FREE(local->cont.lk.dom_lock_op_ret);
+ GF_FREE(local->cont.lk.dom_lock_op_errno);
+ }
+
+ { /* create */
+ if (local->cont.create.fd)
+ fd_unref(local->cont.create.fd);
+ if (local->cont.create.params)
+ dict_unref(local->cont.create.params);
+ }
+
+ { /* mknod */
+ if (local->cont.mknod.params)
+ dict_unref(local->cont.mknod.params);
+ }
+
+ { /* mkdir */
+ if (local->cont.mkdir.params)
+ dict_unref(local->cont.mkdir.params);
+ }
+
+ { /* symlink */
+ if (local->cont.symlink.params)
+ dict_unref(local->cont.symlink.params);
+ }
+
+ { /* writev */
+ GF_FREE(local->cont.writev.vector);
+ if (local->cont.writev.iobref)
+ iobref_unref(local->cont.writev.iobref);
+ }
+
+ { /* setxattr */
+ if (local->cont.setxattr.dict)
+ dict_unref(local->cont.setxattr.dict);
+ }
+
+ { /* fsetxattr */
+ if (local->cont.fsetxattr.dict)
+ dict_unref(local->cont.fsetxattr.dict);
+ }
+
+ { /* removexattr */
+ GF_FREE(local->cont.removexattr.name);
+ }
+ { /* xattrop */
+ if (local->cont.xattrop.xattr)
+ dict_unref(local->cont.xattrop.xattr);
+ }
+ { /* symlink */
+ GF_FREE(local->cont.symlink.linkpath);
+ }
+
+ { /* opendir */
+ GF_FREE(local->cont.opendir.checksum);
+ }
+
+ { /* open */
+ if (local->cont.open.fd)
+ fd_unref(local->cont.open.fd);
+ }
+
+ { /* readdirp */
+ if (local->cont.readdir.dict)
+ dict_unref(local->cont.readdir.dict);
+ }
+
+ { /* inodelk */
+ GF_FREE(local->cont.inodelk.volume);
+ if (local->cont.inodelk.xdata)
+ dict_unref(local->cont.inodelk.xdata);
+ }
+
+ { /* entrylk */
+ GF_FREE(local->cont.entrylk.volume);
+ GF_FREE(local->cont.entrylk.basename);
+ if (local->cont.entrylk.xdata)
+ dict_unref(local->cont.entrylk.xdata);
+ }
+
+ if (local->xdata_req)
+ dict_unref(local->xdata_req);
+
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
+}
- { /* fsetxattr */
- if (local->cont.fsetxattr.dict)
- dict_unref (local->cont.fsetxattr.dict);
- }
+int
+afr_frame_return(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ int call_count = 0;
- { /* removexattr */
- GF_FREE (local->cont.removexattr.name);
- }
- { /* xattrop */
- if (local->cont.xattrop.xattr)
- dict_unref (local->cont.xattrop.xattr);
- }
- { /* fxattrop */
- if (local->cont.fxattrop.xattr)
- dict_unref (local->cont.fxattrop.xattr);
- }
- { /* symlink */
- GF_FREE (local->cont.symlink.linkpath);
- }
+ local = frame->local;
- { /* opendir */
- GF_FREE (local->cont.opendir.checksum);
- }
+ LOCK(&frame->lock);
+ {
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
- { /* readdirp */
- if (local->cont.readdir.dict)
- dict_unref (local->cont.readdir.dict);
- }
+ return call_count;
+}
- { /* inodelk */
- GF_FREE (local->cont.inodelk.volume);
- }
+static char *afr_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL};
- if (local->xdata_req)
- dict_unref (local->xdata_req);
+gf_boolean_t
+afr_is_xattr_ignorable(char *key)
+{
+ int i = 0;
- if (local->xdata_rsp)
- dict_unref (local->xdata_rsp);
+ if (!strncmp(key, AFR_XATTR_PREFIX, SLEN(AFR_XATTR_PREFIX)))
+ return _gf_true;
+ for (i = 0; afr_ignore_xattrs[i]; i++) {
+ if (!strcmp(key, afr_ignore_xattrs[i]))
+ return _gf_true;
+ }
+ return _gf_false;
}
-
-int
-afr_frame_return (call_frame_t *frame)
+static gf_boolean_t
+afr_xattr_match_needed(dict_t *this, char *key1, data_t *value1, void *data)
{
- afr_local_t *local = NULL;
- int call_count = 0;
-
- local = frame->local;
+ /* Ignore all non-disk (i.e. virtual) xattrs right away. */
+ if (!gf_is_valid_xattr_namespace(key1))
+ return _gf_false;
- LOCK (&frame->lock);
- {
- call_count = --local->call_count;
- }
- UNLOCK (&frame->lock);
+ /* Ignore on-disk xattrs that AFR doesn't need to heal. */
+ if (!afr_is_xattr_ignorable(key1))
+ return _gf_true;
- return call_count;
+ return _gf_false;
}
-
gf_boolean_t
-afr_is_entry_possibly_under_txn (afr_local_t *local, xlator_t *this)
+afr_xattrs_are_equal(dict_t *dict1, dict_t *dict2)
{
- int i = 0;
- int tmp = 0;
- afr_private_t *priv = NULL;
-
- priv = this->private;
+ return are_dicts_equal(dict1, dict2, afr_xattr_match_needed, NULL);
+}
- for (i = 0; i < priv->child_count; i++) {
- if (!local->replies[i].xdata)
- continue;
- if (dict_get_int32 (local->replies[i].xdata,
- GLUSTERFS_PARENT_ENTRYLK,
- &tmp) == 0)
- if (tmp)
- return _gf_true;
- }
+static int
+afr_get_parent_read_subvol(xlator_t *this, inode_t *parent,
+ struct afr_reply *replies, unsigned char *readable)
+{
+ int i = 0;
+ int par_read_subvol = -1;
+ int par_read_subvol_iter = -1;
+ afr_private_t *priv = NULL;
- return _gf_false;
-}
+ priv = this->private;
+ if (parent)
+ par_read_subvol = afr_data_subvol_get(parent, this, NULL, NULL, NULL,
+ NULL);
-static char *afr_ignore_xattrs[] = {
- GLUSTERFS_OPEN_FD_COUNT,
- GLUSTERFS_PARENT_ENTRYLK,
- GLUSTERFS_ENTRYLK_COUNT,
- GLUSTERFS_INODELK_COUNT,
- GF_SELINUX_XATTR_KEY,
- QUOTA_SIZE_KEY,
- NULL
-};
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
-gf_boolean_t
-afr_is_xattr_ignorable (char *key)
-{
- int i = 0;
+ if (replies[i].op_ret < 0)
+ continue;
- if (!strncmp (key, AFR_XATTR_PREFIX, strlen(AFR_XATTR_PREFIX)))
- return _gf_true;
- for (i = 0; afr_ignore_xattrs[i]; i++) {
- if (!strcmp (key, afr_ignore_xattrs[i]))
- return _gf_true;
+ if (par_read_subvol_iter == -1) {
+ par_read_subvol_iter = i;
+ continue;
}
- return _gf_false;
+
+ if ((par_read_subvol_iter != par_read_subvol) && readable[i])
+ par_read_subvol_iter = i;
+
+ if (i == par_read_subvol)
+ par_read_subvol_iter = i;
+ }
+ /* At the end of the for-loop, the only reason why @par_read_subvol_iter
+ * could be -1 is when this LOOKUP has failed on all sub-volumes.
+ * So it is okay to send an arbitrary subvolume (0 in this case)
+ * as parent read subvol.
+ */
+ if (par_read_subvol_iter == -1)
+ par_read_subvol_iter = 0;
+
+ return par_read_subvol_iter;
}
int
-xattr_is_equal (dict_t *this, char *key1, data_t *value1, void *data)
+afr_read_subvol_decide(inode_t *inode, xlator_t *this,
+ afr_read_subvol_args_t *args, unsigned char *readable)
{
- dict_t *xattr2 = (dict_t *)data;
- data_t *value2 = NULL;
+ int event = 0;
+ afr_private_t *priv = NULL;
+ unsigned char *intersection = NULL;
- if (afr_is_xattr_ignorable (key1))
- return 0;
+ priv = this->private;
+ intersection = alloca0(priv->child_count);
- value2 = dict_get (xattr2, key1);
- if (!value2)
- return -1;
+ afr_readables_intersect_get(inode, this, &event, intersection);
- if (value1->len != value2->len)
- return -1;
- if(memcmp(value1->data, value2->data, value1->len))
- return -1;
- else
- return 0;
+ if (AFR_COUNT(intersection, priv->child_count) <= 0) {
+ /* TODO: If we have one brick with valid data_readable and
+ * another with metadata_readable, try to send an iatt with
+ * valid bits from both.*/
+ return -1;
+ }
+ memcpy(readable, intersection, sizeof(*readable) * priv->child_count);
+
+ return afr_read_subvol_select_by_policy(inode, this, intersection, args);
}
-/* To conclude that both dicts are equal, we need to check if
- * 1) For every key-val pair in dict1, a match is present in dict2
- * 2) For every key-val pair in dict2, a match is present in dict1
- * We need to do both because ignoring glusterfs' internal xattrs
- * happens only in xattr_is_equal().
- */
-gf_boolean_t
-afr_xattrs_are_equal (dict_t *dict1, dict_t *dict2)
+static inline int
+afr_first_up_child(call_frame_t *frame, xlator_t *this)
{
- int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
- ret = dict_foreach (dict1, xattr_is_equal, dict2);
- if (ret == -1)
- return _gf_false;
+ local = frame->local;
+ priv = this->private;
- ret = dict_foreach (dict2, xattr_is_equal, dict1);
- if (ret == -1)
- return _gf_false;
-
- return _gf_true;
+ for (i = 0; i < priv->child_count; i++)
+ if (local->replies[i].valid && local->replies[i].op_ret == 0)
+ return i;
+ return -1;
}
-static int
-afr_get_parent_read_subvol (xlator_t *this, inode_t *parent,
- struct afr_reply *replies, unsigned char *readable)
+static void
+afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this,
+ unsigned char *success_replies,
+ unsigned char *data_readable, int *read_subvol)
{
- int i = 0;
- int par_read_subvol = -1;
- int par_read_subvol_iter = -1;
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int spb_subvol = -1;
+ int child_count = -1;
- priv = this->private;
+ if (*read_subvol != -1)
+ return;
- if (parent)
- par_read_subvol = afr_data_subvol_get (parent, this, 0, 0);
+ priv = this->private;
+ local = frame->local;
+ child_count = priv->child_count;
+
+ afr_split_brain_read_subvol_get(local->inode, this, frame, &spb_subvol);
+ if ((spb_subvol >= 0) &&
+ (AFR_COUNT(success_replies, child_count) == child_count)) {
+ *read_subvol = spb_subvol;
+ } else if (!priv->quorum_count ||
+ frame->root->pid == GF_CLIENT_PID_GLFS_HEAL) {
+ *read_subvol = afr_first_up_child(frame, this);
+ } else if (priv->quorum_count &&
+ afr_has_quorum(data_readable, this, NULL)) {
+ /* read_subvol is guaranteed to be valid if we hit this path. */
+ *read_subvol = afr_first_up_child(frame, this);
+ } else {
+ /* If quorum is enabled and we do not have a
+ readable yet, it means all good copies are down.
+ */
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_READ_SUBVOL_ERROR,
+ "no read "
+ "subvols for %s",
+ local->loc.path);
+ }
+ if (*read_subvol >= 0)
+ dict_del_sizen(local->replies[*read_subvol].xdata, GF_CONTENT_KEY);
+}
- for (i = 0; i < priv->child_count; i++) {
- if (!replies[i].valid)
- continue;
+static void
+afr_lookup_done(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = -1;
+ int op_errno = 0;
+ int read_subvol = 0;
+ int par_read_subvol = 0;
+ int ret = -1;
+ unsigned char *readable = NULL;
+ unsigned char *success_replies = NULL;
+ int event = 0;
+ struct afr_reply *replies = NULL;
+ uuid_t read_gfid = {
+ 0,
+ };
+ gf_boolean_t locked_entry = _gf_false;
+ gf_boolean_t in_flight_create = _gf_false;
+ gf_boolean_t can_interpret = _gf_true;
+ inode_t *parent = NULL;
+ ia_type_t ia_type = IA_INVAL;
+ afr_read_subvol_args_t args = {
+ 0,
+ };
+ char *gfid_heal_msg = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ replies = local->replies;
+ parent = local->loc.parent;
+
+ locked_entry = afr_is_possibly_under_txn(AFR_ENTRY_TRANSACTION, local,
+ this);
+
+ readable = alloca0(priv->child_count);
+ success_replies = alloca0(priv->child_count);
+
+ afr_inode_read_subvol_get(parent, this, readable, NULL, &event);
+ par_read_subvol = afr_get_parent_read_subvol(this, parent, replies,
+ readable);
+
+ /* First, check if we have a gfid-change from somewhere,
+ If so, propagate that so that a fresh lookup can be
+ issued
+ */
+ if (local->cont.lookup.needs_fresh_lookup) {
+ local->op_ret = -1;
+ local->op_errno = ESTALE;
+ goto error;
+ }
- if (replies[i].op_ret < 0)
- continue;
+ op_errno = afr_final_errno(frame->local, this->private);
+ local->op_errno = op_errno;
- if (par_read_subvol_iter == -1) {
- par_read_subvol_iter = i;
- continue;
- }
+ read_subvol = -1;
+ afr_fill_success_replies(local, priv, success_replies);
- if ((par_read_subvol_iter != par_read_subvol) && readable[i])
- par_read_subvol_iter = i;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
- if (i == par_read_subvol)
- par_read_subvol_iter = i;
+ if (replies[i].op_ret == -1) {
+ if (locked_entry && replies[i].op_errno == ENOENT) {
+ in_flight_create = _gf_true;
+ }
+ continue;
}
- /* At the end of the for-loop, the only reason why @par_read_subvol_iter
- * could be -1 is when this LOOKUP has failed on all sub-volumes.
- * So it is okay to send an arbitrary subvolume (0 in this case)
- * as parent read subvol.
- */
- if (par_read_subvol_iter == -1)
- par_read_subvol_iter = 0;
-
- return par_read_subvol_iter;
-
-}
-static void
-afr_lookup_done (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = -1;
- int op_errno = 0;
- int read_subvol = 0;
- int par_read_subvol = 0;
- unsigned char *readable = NULL;
- int event = 0;
- struct afr_reply *replies = NULL;
- uuid_t read_gfid = {0, };
- gf_boolean_t locked_entry = _gf_false;
- gf_boolean_t can_interpret = _gf_true;
- inode_t *parent = NULL;
- int spb_choice = -1;
-
- priv = this->private;
- local = frame->local;
- replies = local->replies;
- parent = local->loc.parent;
-
- locked_entry = afr_is_entry_possibly_under_txn (local, this);
-
- readable = alloca0 (priv->child_count);
-
- afr_inode_read_subvol_get (parent, this, readable, NULL, &event);
-
- afr_inode_split_brain_choice_get (local->inode, this,
- &spb_choice);
- /* First, check if we have a gfid-change from somewhere,
- If so, propagate that so that a fresh lookup can be
- issued
- */
- if (local->cont.lookup.needs_fresh_lookup) {
- local->op_ret = -1;
- local->op_errno = ESTALE;
- goto unwind;
- }
-
- op_errno = afr_final_errno (frame->local, this->private);
- local->op_errno = op_errno;
-
- read_subvol = -1;
- for (i = 0; i < priv->child_count; i++) {
- if (!replies[i].valid)
- continue;
-
- if (locked_entry && replies[i].op_ret == -1 &&
- replies[i].op_errno == ENOENT) {
- /* Second, check entry is still
- "underway" in creation */
- local->op_ret = -1;
- local->op_errno = ENOENT;
- goto unwind;
- }
-
- if (replies[i].op_ret == -1)
- continue;
-
- if (read_subvol == -1 || !readable[read_subvol]) {
- read_subvol = i;
- gf_uuid_copy (read_gfid, replies[i].poststat.ia_gfid);
- local->op_ret = 0;
- }
- }
-
- if (read_subvol == -1)
- goto unwind;
- /* We now have a read_subvol, which is readable[] (if there
- were any). Next we look for GFID mismatches. We don't
- consider a GFID mismatch as an error if read_subvol is
- readable[] but the mismatching GFID subvol is not.
- */
- for (i = 0; i < priv->child_count; i++) {
- if (!replies[i].valid || replies[i].op_ret == -1) {
- if (priv->child_up[i])
- can_interpret = _gf_false;
- continue;
- }
-
- if (!gf_uuid_compare (replies[i].poststat.ia_gfid, read_gfid))
- continue;
-
- can_interpret = _gf_false;
-
- if (locked_entry)
- continue;
-
- /* Now GFIDs mismatch. It's OK as long as this subvol
- is not readable[] but read_subvol is */
- if (readable[read_subvol] && !readable[i])
- continue;
-
- /* LOG ERROR */
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unwind;
- }
-
- /* Forth, for the finalized GFID, pick the best subvolume
- to return stats from.
- */
- if (can_interpret) {
- /* It is safe to call afr_replies_interpret() because we have
- a response from all the UP subvolumes and all of them resolved
- to the same GFID
- */
- if (afr_replies_interpret (frame, this, local->inode)) {
- read_subvol = afr_data_subvol_get (local->inode, this,
- 0, 0);
- afr_inode_read_subvol_reset (local->inode, this);
- goto cant_interpret;
- } else {
- read_subvol = afr_data_subvol_get (local->inode, this,
- 0, 0);
- }
- } else {
- cant_interpret:
- if (read_subvol == -1) {
- if (spb_choice >= 0)
- read_subvol = spb_choice;
- else
- read_subvol = 0;
- }
- dict_del (replies[read_subvol].xdata, GF_CONTENT_KEY);
- }
-
- afr_handle_quota_size (frame, this);
+ if (read_subvol == -1 || !readable[read_subvol]) {
+ read_subvol = i;
+ gf_uuid_copy(read_gfid, replies[i].poststat.ia_gfid);
+ ia_type = replies[i].poststat.ia_type;
+ local->op_ret = 0;
+ }
+ }
-unwind:
+ if (in_flight_create && !afr_has_quorum(success_replies, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ goto error;
+ }
+
+ if (read_subvol == -1)
+ goto error;
+ /* We now have a read_subvol, which is readable[] (if there
+ were any). Next we look for GFID mismatches. We don't
+ consider a GFID mismatch as an error if read_subvol is
+ readable[] but the mismatching GFID subvol is not.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1) {
+ continue;
+ }
+
+ if (!gf_uuid_compare(replies[i].poststat.ia_gfid, read_gfid))
+ continue;
+
+ can_interpret = _gf_false;
+
+ if (locked_entry)
+ continue;
+
+ /* Now GFIDs mismatch. It's OK as long as this subvol
+ is not readable[] but read_subvol is */
+ if (readable[read_subvol] && !readable[i])
+ continue;
+
+ /* If we were called from glfsheal and there is still a gfid
+ * mismatch, succeed the lookup and let glfsheal print the
+ * response via gfid-heal-msg.*/
+ if (!dict_get_str_sizen(local->xattr_req, "gfid-heal-msg",
+ &gfid_heal_msg))
+ goto cant_interpret;
+
+ /* LOG ERROR */
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto error;
+ }
+
+ /* Forth, for the finalized GFID, pick the best subvolume
+ to return stats from.
+ */
+ read_subvol = -1;
+ memset(readable, 0, sizeof(*readable) * priv->child_count);
+ if (can_interpret) {
+ if (!afr_has_quorum(success_replies, this, NULL))
+ goto cant_interpret;
+ /* It is safe to call afr_replies_interpret() because we have
+ a response from all the UP subvolumes and all of them resolved
+ to the same GFID
+ */
+ gf_uuid_copy(args.gfid, read_gfid);
+ args.ia_type = ia_type;
+ ret = afr_replies_interpret(frame, this, local->inode, NULL);
+ read_subvol = afr_read_subvol_decide(local->inode, this, &args,
+ readable);
+ if (read_subvol == -1)
+ goto cant_interpret;
+ if (ret) {
+ afr_inode_need_refresh_set(local->inode, this);
+ dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY);
+ }
+ } else {
+ cant_interpret:
+ afr_attempt_readsubvol_set(frame, this, success_replies, readable,
+ &read_subvol);
if (read_subvol == -1) {
- if (spb_choice >= 0)
- read_subvol = spb_choice;
- else
- read_subvol = 0;
+ goto error;
}
- par_read_subvol = afr_get_parent_read_subvol (this, parent, replies,
- readable);
+ }
- AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->inode, &local->replies[read_subvol].poststat,
- local->replies[read_subvol].xdata,
- &local->replies[par_read_subvol].postparent);
+ afr_handle_quota_size(frame, this);
+
+ afr_set_need_heal(this, local);
+ if (AFR_IS_ARBITER_BRICK(priv, read_subvol) && local->op_ret == 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ gf_msg_debug(this->name, 0,
+ "Arbiter cannot be a read subvol "
+ "for %s",
+ local->loc.path);
+ goto error;
+ }
+
+ ret = dict_get_str_sizen(local->xattr_req, "gfid-heal-msg", &gfid_heal_msg);
+ if (!ret) {
+ ret = dict_set_str_sizen(local->replies[read_subvol].xdata,
+ "gfid-heal-msg", gfid_heal_msg);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
+ "Error setting gfid-heal-msg dict");
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ }
+ }
+
+ AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->replies[read_subvol].poststat,
+ local->replies[read_subvol].xdata,
+ &local->replies[par_read_subvol].postparent);
+ return;
+
+error:
+ AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, NULL, NULL,
+ NULL, NULL);
}
/*
@@ -1479,606 +3122,845 @@ unwind:
* others in that they must be given higher priority while
* returning to the user.
*
- * The hierarchy is ENODATA > ENOENT > ESTALE > others
+ * The hierarchy is ENODATA > ENOENT > ESTALE > ENOSPC others
*/
int
-afr_higher_errno (int32_t old_errno, int32_t new_errno)
+afr_higher_errno(int32_t old_errno, int32_t new_errno)
{
- if (old_errno == ENODATA || new_errno == ENODATA)
- return ENODATA;
- if (old_errno == ENOENT || new_errno == ENOENT)
- return ENOENT;
- if (old_errno == ESTALE || new_errno == ESTALE)
- return ESTALE;
-
- return new_errno;
+ if (old_errno == ENODATA || new_errno == ENODATA)
+ return ENODATA;
+ if (old_errno == ENOENT || new_errno == ENOENT)
+ return ENOENT;
+ if (old_errno == ESTALE || new_errno == ESTALE)
+ return ESTALE;
+ if (old_errno == ENOSPC || new_errno == ENOSPC)
+ return ENOSPC;
+
+ return new_errno;
}
-
int
-afr_final_errno (afr_local_t *local, afr_private_t *priv)
+afr_final_errno(afr_local_t *local, afr_private_t *priv)
{
- int i = 0;
- int op_errno = 0;
- int tmp_errno = 0;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!local->replies[i].valid)
- continue;
- if (local->replies[i].op_ret == 0)
- continue;
- tmp_errno = local->replies[i].op_errno;
- op_errno = afr_higher_errno (op_errno, tmp_errno);
- }
-
- return op_errno;
+ int i = 0;
+ int op_errno = 0;
+ int tmp_errno = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret >= 0)
+ continue;
+ tmp_errno = local->replies[i].op_errno;
+ op_errno = afr_higher_errno(op_errno, tmp_errno);
+ }
+
+ return op_errno;
}
static int32_t
-afr_local_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
+afr_local_discovery_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- int ret = 0;
- char *pathinfo = NULL;
- gf_boolean_t is_local = _gf_false;
- afr_private_t *priv = NULL;
- int32_t child_index = -1;
-
- if (op_ret != 0) {
- goto out;
- }
-
- priv = this->private;
- child_index = (int32_t)(long)cookie;
-
- ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
- if (ret != 0) {
- goto out;
- }
-
- ret = glusterfs_is_local_pathinfo (pathinfo, &is_local);
- if (ret) {
- goto out;
- }
-
- /*
- * Note that one local subvolume will override another here. The only
- * way to avoid that would be to retain extra information about whether
- * the previous read_child is local, and it's just not worth it. Even
- * the slowest local subvolume is far preferable to a remote one.
- */
- if (is_local) {
- gf_log (this->name, GF_LOG_INFO,
- "selecting local read_child %s",
- priv->children[child_index]->name);
- priv->read_child = child_index;
- }
+ int ret = 0;
+ char *pathinfo = NULL;
+ gf_boolean_t is_local = _gf_false;
+ afr_private_t *priv = NULL;
+ int32_t child_index = -1;
+
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ priv = this->private;
+ child_index = (int32_t)(long)cookie;
+
+ ret = dict_get_str_sizen(dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ if (ret != 0) {
+ goto out;
+ }
+
+ ret = glusterfs_is_local_pathinfo(pathinfo, &is_local);
+ if (ret) {
+ goto out;
+ }
+
+ /*
+ * Note that one local subvolume will override another here. The only
+ * way to avoid that would be to retain extra information about whether
+ * the previous read_child is local, and it's just not worth it. Even
+ * the slowest local subvolume is far preferable to a remote one.
+ */
+ if (is_local) {
+ priv->local[child_index] = 1;
+ /* Don't set arbiter as read child. */
+ if (AFR_IS_ARBITER_BRICK(priv, child_index))
+ goto out;
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_LOCAL_CHILD,
+ "selecting local read_child %s",
+ priv->children[child_index]->name);
+
+ priv->read_child = child_index;
+ }
out:
- STACK_DESTROY(frame->root);
- return 0;
+ STACK_DESTROY(frame->root);
+ return 0;
}
static void
-afr_attempt_local_discovery (xlator_t *this, int32_t child_index)
+afr_attempt_local_discovery(xlator_t *this, int32_t child_index)
{
- call_frame_t *newframe = NULL;
- loc_t tmploc = {0,};
- afr_private_t *priv = this->private;
-
- newframe = create_frame(this,this->ctx->pool);
- if (!newframe) {
- return;
- }
+ call_frame_t *newframe = NULL;
+ loc_t tmploc = {
+ 0,
+ };
+ afr_private_t *priv = this->private;
+
+ newframe = create_frame(this, this->ctx->pool);
+ if (!newframe) {
+ return;
+ }
- tmploc.gfid[sizeof(tmploc.gfid)-1] = 1;
- STACK_WIND_COOKIE (newframe, afr_local_discovery_cbk,
- (void *)(long)child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->getxattr,
- &tmploc, GF_XATTR_PATHINFO_KEY, NULL);
+ tmploc.gfid[sizeof(tmploc.gfid) - 1] = 1;
+ STACK_WIND_COOKIE(newframe, afr_local_discovery_cbk,
+ (void *)(long)child_index, priv->children[child_index],
+ priv->children[child_index]->fops->getxattr, &tmploc,
+ GF_XATTR_PATHINFO_KEY, NULL);
}
int
-afr_lookup_sh_metadata_wrap (void *opaque)
+afr_lookup_sh_metadata_wrap(void *opaque)
{
- call_frame_t *frame = opaque;
- afr_local_t *local = NULL;
- xlator_t *this = NULL;
- inode_t *inode = NULL;
- afr_private_t *priv = NULL;
- struct afr_reply *replies = NULL;
- int i= 0, first = -1;
+ call_frame_t *frame = opaque;
+ afr_local_t *local = NULL;
+ xlator_t *this = NULL;
+ inode_t *inode = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int i = 0, first = -1;
+ int ret = -1;
+ dict_t *dict = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ priv = this->private;
+ replies = local->replies;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ first = i;
+ break;
+ }
+ if (first == -1)
+ goto out;
+
+ if (afr_selfheal_metadata_by_stbuf(this, &replies[first].poststat))
+ goto out;
+
+ afr_local_replies_wipe(local, this->private);
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+ if (local->xattr_req) {
+ dict_copy(local->xattr_req, dict);
+ }
+
+ ret = dict_set_sizen_str_sizen(dict, "link-count", GF_XATTROP_INDEX_COUNT);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to set link-count in dict ");
+ }
+
+ if (loc_is_nameless(&local->loc)) {
+ ret = afr_selfheal_unlocked_discover_on(frame, local->inode,
+ local->loc.gfid, local->replies,
+ local->child_up, dict);
+ } else {
+ inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent,
+ local->loc.name, local->replies,
+ local->child_up, dict);
+ }
+ if (inode)
+ inode_unref(inode);
+out:
+ if (loc_is_nameless(&local->loc))
+ afr_discover_done(frame, this);
+ else
+ afr_lookup_done(frame, this);
- local = frame->local;
- this = frame->this;
- priv = this->private;
- replies = local->replies;
-
- for (i =0; i < priv->child_count; i++) {
- if(!replies[i].valid || replies[i].op_ret == -1)
- continue;
- first = i;
- break;
- }
- if (first == -1)
- goto out;
+ if (dict)
+ dict_unref(dict);
- inode = afr_inode_link (local->inode,&replies[first].poststat);
- if(!inode)
- goto out;
+ return 0;
+}
- afr_selfheal_metadata (frame, this, inode);
- inode_forget (inode, 1);
- inode_unref (inode);
+gf_boolean_t
+afr_is_pending_set(xlator_t *this, dict_t *xdata, int type)
+{
+ int idx = -1;
+ afr_private_t *priv = NULL;
+ void *pending_raw = NULL;
+ int *pending_int = NULL;
+ int i = 0;
- afr_local_replies_wipe (local, this->private);
- inode = afr_selfheal_unlocked_lookup_on (frame, local->loc.parent,
- local->loc.name, local->replies,
- local->child_up, NULL);
- if (inode)
- inode_unref (inode);
-out:
- afr_lookup_done (frame, this);
+ priv = this->private;
+ idx = afr_index_for_transaction_type(type);
- return 0;
+ if (dict_get_ptr(xdata, AFR_DIRTY, &pending_raw) == 0) {
+ if (pending_raw) {
+ pending_int = pending_raw;
+
+ if (ntoh32(pending_int[idx]))
+ return _gf_true;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (dict_get_ptr(xdata, priv->pending_key[i], &pending_raw))
+ continue;
+ if (!pending_raw)
+ continue;
+ pending_int = pending_raw;
+
+ if (ntoh32(pending_int[idx]))
+ return _gf_true;
+ }
+
+ return _gf_false;
}
static gf_boolean_t
afr_can_start_metadata_self_heal(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- struct afr_reply *replies = NULL;
- int i = 0, first = -1;
- gf_boolean_t start = _gf_false;
- struct iatt stbuf = {0, };
-
- local = frame->local;
- replies = local->replies;
- priv = this->private;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int i = 0, first = -1;
+ gf_boolean_t start = _gf_false;
+ struct iatt stbuf = {
+ 0,
+ };
+
+ local = frame->local;
+ replies = local->replies;
+ priv = this->private;
+
+ if (!priv->metadata_self_heal)
+ return _gf_false;
- for (i = 0; i < priv->child_count; i++) {
- if(!replies[i].valid || replies[i].op_ret == -1)
- continue;
- if (first == -1) {
- first = i;
- stbuf = replies[i].poststat;
- continue;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (first == -1) {
+ first = i;
+ stbuf = replies[i].poststat;
+ continue;
+ }
- if (gf_uuid_compare (stbuf.ia_gfid, replies[i].poststat.ia_gfid)) {
- start = _gf_false;
- break;
- }
- if (!IA_EQUAL (stbuf, replies[i].poststat, type)) {
- start = _gf_false;
- break;
- }
+ if (afr_is_pending_set(this, replies[i].xdata,
+ AFR_METADATA_TRANSACTION)) {
+ /* Let shd do the heal so that lookup is not blocked
+ * on getting metadata lock/doing the heal */
+ start = _gf_false;
+ break;
+ }
- /*Check if iattrs need heal*/
- if ((!IA_EQUAL (stbuf, replies[i].poststat, uid)) ||
- (!IA_EQUAL (stbuf, replies[i].poststat, gid)) ||
- (!IA_EQUAL (stbuf, replies[i].poststat, prot))) {
- start = _gf_true;
- continue;
- }
+ if (gf_uuid_compare(stbuf.ia_gfid, replies[i].poststat.ia_gfid)) {
+ start = _gf_false;
+ break;
+ }
+ if (!IA_EQUAL(stbuf, replies[i].poststat, type)) {
+ start = _gf_false;
+ break;
+ }
- /*Check if xattrs need heal*/
- if (!afr_xattrs_are_equal (replies[first].xdata,
- replies[i].xdata))
- start = _gf_true;
+ /*Check if iattrs need heal*/
+ if ((!IA_EQUAL(stbuf, replies[i].poststat, uid)) ||
+ (!IA_EQUAL(stbuf, replies[i].poststat, gid)) ||
+ (!IA_EQUAL(stbuf, replies[i].poststat, prot))) {
+ start = _gf_true;
+ continue;
}
- return start;
+ /*Check if xattrs need heal*/
+ if (!afr_xattrs_are_equal(replies[first].xdata, replies[i].xdata))
+ start = _gf_true;
+ }
+
+ return start;
}
int
-afr_lookup_metadata_heal_check (call_frame_t *frame, xlator_t *this)
+afr_lookup_metadata_heal_check(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *heal = NULL;
- int ret = 0;
-
- if (!afr_can_start_metadata_self_heal (frame, this))
- goto out;
-
- heal = copy_frame (frame);
- if (heal)
- heal->root->pid = GF_CLIENT_PID_AFR_SELF_HEALD;
- ret = synctask_new (this->ctx->env, afr_lookup_sh_metadata_wrap,
- afr_refresh_selfheal_done, heal, frame);
- if(ret)
- goto out;
- return ret;
+ call_frame_t *heal = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ if (!afr_can_start_metadata_self_heal(frame, this))
+ goto out;
+
+ heal = afr_frame_create(this, &ret);
+ if (!heal) {
+ ret = -ret;
+ goto out;
+ }
+
+ ret = synctask_new(this->ctx->env, afr_lookup_sh_metadata_wrap,
+ afr_refresh_selfheal_done, heal, frame);
+ if (ret)
+ goto out;
+ return ret;
out:
- afr_lookup_done (frame, this);
- return ret;
+ if (loc_is_nameless(&local->loc))
+ afr_discover_done(frame, this);
+ else
+ afr_lookup_done(frame, this);
+ if (heal)
+ AFR_STACK_DESTROY(heal);
+ return ret;
}
int
-afr_lookup_selfheal_wrap (void *opaque)
+afr_lookup_selfheal_wrap(void *opaque)
{
- int ret = 0;
- call_frame_t *frame = opaque;
- afr_local_t *local = NULL;
- xlator_t *this = NULL;
- inode_t *inode = NULL;
+ int ret = 0;
+ call_frame_t *frame = opaque;
+ afr_local_t *local = NULL;
+ xlator_t *this = NULL;
+ inode_t *inode = NULL;
+ uuid_t pargfid = {
+ 0,
+ };
+
+ local = frame->local;
+ this = frame->this;
+ loc_pargfid(&local->loc, pargfid);
+
+ ret = afr_selfheal_name(frame->this, pargfid, local->loc.name,
+ &local->cont.lookup.gfid_req, local->xattr_req);
+ if (ret == -EIO)
+ goto unwind;
+
+ afr_local_replies_wipe(local, this->private);
+
+ inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent,
+ local->loc.name, local->replies,
+ local->child_up, local->xattr_req);
+ if (inode)
+ inode_unref(inode);
+
+ afr_lookup_metadata_heal_check(frame, this);
+ return 0;
- local = frame->local;
- this = frame->this;
+unwind:
+ AFR_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, NULL, NULL);
+ return 0;
+}
- ret = afr_selfheal_name (frame->this, local->loc.pargfid,
- local->loc.name, &local->cont.lookup.gfid_req);
- if (ret == -EIO)
- goto unwind;
+int
+afr_lookup_entry_heal(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ call_frame_t *heal = NULL;
+ int i = 0, first = -1;
+ gf_boolean_t name_state_mismatch = _gf_false;
+ struct afr_reply *replies = NULL;
+ int ret = 0;
+ unsigned char *par_readables = NULL;
+ unsigned char *success = NULL;
+ int32_t op_errno = 0;
+ uuid_t gfid = {0};
+
+ local = frame->local;
+ replies = local->replies;
+ priv = this->private;
+ par_readables = alloca0(priv->child_count);
+ success = alloca0(priv->child_count);
+
+ ret = afr_inode_read_subvol_get(local->loc.parent, this, par_readables,
+ NULL, NULL);
+ if (ret < 0 || AFR_COUNT(par_readables, priv->child_count) == 0) {
+ /* In this case set par_readables to all 1 so that name_heal
+ * need checks at the end of this function will flag missing
+ * entry when name state mismatches*/
+ memset(par_readables, 1, priv->child_count);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret == 0) {
+ if (gf_uuid_is_null(gfid)) {
+ gf_uuid_copy(gfid, replies[i].poststat.ia_gfid);
+ }
+ success[i] = 1;
+ } else {
+ if ((replies[i].op_errno != ENOTCONN) &&
+ (replies[i].op_errno != ENOENT) &&
+ (replies[i].op_errno != ESTALE)) {
+ op_errno = replies[i].op_errno;
+ }
+ }
- afr_local_replies_wipe (local, this->private);
+ /*gfid is missing, needs heal*/
+ if ((replies[i].op_ret == -1) && (replies[i].op_errno == ENODATA)) {
+ goto name_heal;
+ }
- inode = afr_selfheal_unlocked_lookup_on (frame, local->loc.parent,
- local->loc.name, local->replies,
- local->child_up, NULL);
- if (inode)
- inode_unref (inode);
+ if (first == -1) {
+ first = i;
+ continue;
+ }
- afr_lookup_metadata_heal_check(frame, this);
- return 0;
+ if (replies[i].op_ret != replies[first].op_ret) {
+ name_state_mismatch = _gf_true;
+ }
-unwind:
- AFR_STACK_UNWIND (lookup, frame, -1, EIO, NULL, NULL, NULL, NULL);
- return 0;
-}
+ if (replies[i].op_ret == 0) {
+ /* Rename after this lookup may succeed if we don't do
+ * a name-heal and the destination may not have pending xattrs
+ * to indicate which name is good and which is bad so always do
+ * this heal*/
+ if (gf_uuid_compare(replies[i].poststat.ia_gfid, gfid)) {
+ goto name_heal;
+ }
+ }
+ }
+ if (name_state_mismatch) {
+ if (!priv->quorum_count)
+ goto name_heal;
+ if (!afr_has_quorum(success, this, NULL))
+ goto name_heal;
+ if (op_errno)
+ goto name_heal;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (par_readables[i] && replies[i].op_ret < 0 &&
+ replies[i].op_errno != ENOTCONN) {
+ goto name_heal;
+ }
+ }
+ }
+
+ goto metadata_heal;
+
+name_heal:
+ heal = afr_frame_create(this, NULL);
+ if (!heal)
+ goto metadata_heal;
+
+ ret = synctask_new(this->ctx->env, afr_lookup_selfheal_wrap,
+ afr_refresh_selfheal_done, heal, frame);
+ if (ret) {
+ AFR_STACK_DESTROY(heal);
+ goto metadata_heal;
+ }
+ return ret;
-int
-afr_lookup_entry_heal (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- call_frame_t *heal = NULL;
- int i = 0, first = -1;
- gf_boolean_t need_heal = _gf_false;
- struct afr_reply *replies = NULL;
- int ret = 0;
-
- local = frame->local;
- replies = local->replies;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!replies[i].valid)
- continue;
-
- if ((replies[i].op_ret == -1) &&
- (replies[i].op_errno == ENODATA))
- need_heal = _gf_true;
-
- if (first == -1) {
- first = i;
- continue;
- }
-
- if (replies[i].op_ret != replies[first].op_ret) {
- need_heal = _gf_true;
- break;
- }
-
- if (gf_uuid_compare (replies[i].poststat.ia_gfid,
- replies[first].poststat.ia_gfid)) {
- need_heal = _gf_true;
- break;
- }
- }
-
- if (need_heal) {
- heal = copy_frame (frame);
- if (heal)
- heal->root->pid = GF_CLIENT_PID_AFR_SELF_HEALD;
- ret = synctask_new (this->ctx->env, afr_lookup_selfheal_wrap,
- afr_refresh_selfheal_done, heal, frame);
- if (ret)
- goto metadata_heal;
- return ret;
- }
metadata_heal:
- ret = afr_lookup_metadata_heal_check (frame, this);
+ ret = afr_lookup_metadata_heal_check(frame, this);
- return ret;
+ return ret;
}
-
int
-afr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode, struct iatt *buf,
- dict_t *xdata, struct iatt *postparent)
+afr_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = -1;
-
- child_index = (long) cookie;
-
- local = frame->local;
-
- local->replies[child_index].valid = 1;
- local->replies[child_index].op_ret = op_ret;
- local->replies[child_index].op_errno = op_errno;
- /*
- * On revalidate lookup if the gfid-changed, afr should unwind the fop
- * with ESTALE so that a fresh lookup will be sent by the top xlator.
- * So remember it.
- */
- if (xdata && dict_get (xdata, "gfid-changed"))
- local->cont.lookup.needs_fresh_lookup = _gf_true;
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int child_index = -1;
+ GF_UNUSED int ret = 0;
+ int8_t need_heal = 1;
+
+ child_index = (long)cookie;
+
+ local = frame->local;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ /*
+ * On revalidate lookup if the gfid-changed, afr should unwind the fop
+ * with ESTALE so that a fresh lookup will be sent by the top xlator.
+ * So remember it.
+ */
+ if (xdata && dict_get_sizen(xdata, "gfid-changed"))
+ local->cont.lookup.needs_fresh_lookup = _gf_true;
+
+ if (xdata) {
+ ret = dict_get_int8(xdata, "link-count", &need_heal);
+ local->replies[child_index].need_heal = need_heal;
+ } else {
+ local->replies[child_index].need_heal = need_heal;
+ }
+ if (op_ret != -1) {
+ local->replies[child_index].poststat = *buf;
+ local->replies[child_index].postparent = *postparent;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref(xdata);
+ }
- if (op_ret != -1) {
- local->replies[child_index].poststat = *buf;
- local->replies[child_index].postparent = *postparent;
- if (xdata)
- local->replies[child_index].xdata = dict_ref (xdata);
- }
+ call_count = afr_frame_return(frame);
+ if (call_count == 0) {
+ afr_set_need_heal(this, local);
+ afr_lookup_entry_heal(frame, this);
+ }
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_lookup_entry_heal (frame, this);
- }
-
- return 0;
+ return 0;
}
-
-
static void
-afr_discover_done (call_frame_t *frame, xlator_t *this)
+afr_discover_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = -1;
- int op_errno = 0;
- int read_subvol = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int read_subvol = -1;
+ int ret = 0;
+ unsigned char *data_readable = NULL;
+ unsigned char *success_replies = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ data_readable = alloca0(priv->child_count);
+ success_replies = alloca0(priv->child_count);
+
+ afr_fill_success_replies(local, priv, success_replies);
+ if (AFR_COUNT(success_replies, priv->child_count) > 0)
+ local->op_ret = 0;
+
+ if (local->op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(frame->local, this->private);
+ goto error;
+ }
- priv = this->private;
- local = frame->local;
+ if (!afr_has_quorum(success_replies, this, frame))
+ goto unwind;
- for (i = 0; i < priv->child_count; i++) {
- if (!local->replies[i].valid)
- continue;
- if (local->replies[i].op_ret == 0)
- local->op_ret = 0;
- }
-
- op_errno = afr_final_errno (frame->local, this->private);
-
- if (local->op_ret < 0) {
- local->op_errno = op_errno;
- local->op_ret = -1;
- goto unwind;
- }
-
- afr_replies_interpret (frame, this, local->inode);
-
- read_subvol = afr_data_subvol_get (local->inode, this, 0, 0);
- if (read_subvol == -1) {
- gf_log (this->name, GF_LOG_WARNING, "no read subvols for %s",
- local->loc.path);
-
- for (i = 0; i < priv->child_count; i++) {
- if (!local->replies[i].valid ||
- local->replies[i].op_ret == -1)
- continue;
- read_subvol = i;
- break;
- }
- }
+ ret = afr_replies_interpret(frame, this, local->inode, NULL);
+ if (ret) {
+ afr_inode_need_refresh_set(local->inode, this);
+ }
+
+ read_subvol = afr_read_subvol_decide(local->inode, this, NULL,
+ data_readable);
unwind:
- if (read_subvol == -1) {
- afr_inode_split_brain_choice_get (local->inode, this,
- &read_subvol);
- if (read_subvol == -1)
- read_subvol = 0;
- }
+ afr_attempt_readsubvol_set(frame, this, success_replies, data_readable,
+ &read_subvol);
+ if (read_subvol == -1)
+ goto error;
- AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->inode, &local->replies[read_subvol].poststat,
- local->replies[read_subvol].xdata,
- &local->replies[read_subvol].postparent);
+ if (AFR_IS_ARBITER_BRICK(priv, read_subvol) && local->op_ret == 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ gf_msg_debug(this->name, 0,
+ "Arbiter cannot be a read subvol "
+ "for %s",
+ local->loc.path);
+ }
+
+ AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->replies[read_subvol].poststat,
+ local->replies[read_subvol].xdata,
+ &local->replies[read_subvol].postparent);
+ return;
+
+error:
+ AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, NULL, NULL,
+ NULL, NULL);
}
-
-int
-afr_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode, struct iatt *buf,
- dict_t *xdata, struct iatt *postparent)
+static int
+afr_ta_id_file_check(void *opaque)
{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = -1;
+ afr_private_t *priv = NULL;
+ xlator_t *this = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt stbuf = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ uuid_t gfid = {
+ 0,
+ };
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ this = opaque;
+ priv = this->private;
+
+ ret = afr_fill_ta_loc(this, &loc, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate thin-arbiter loc for: %s.", loc.name);
+ goto out;
+ }
+
+ ret = syncop_lookup(priv->children[THIN_ARBITER_BRICK_INDEX], &loc, &stbuf,
+ 0, 0, 0);
+ if (ret == 0) {
+ goto out;
+ } else if (ret == -ENOENT) {
+ fd = fd_create(loc.inode, getpid());
+ if (!fd)
+ goto out;
+ dict = dict_new();
+ if (!dict)
+ goto out;
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(dict, "gfid-req", gfid, true);
+ ret = syncop_create(priv->children[THIN_ARBITER_BRICK_INDEX], &loc,
+ O_RDWR, 0664, fd, &stbuf, dict, NULL);
+ }
- child_index = (long) cookie;
-
- local = frame->local;
-
- local->replies[child_index].valid = 1;
- local->replies[child_index].op_ret = op_ret;
- local->replies[child_index].op_errno = op_errno;
- if (op_ret != -1) {
- local->replies[child_index].poststat = *buf;
- local->replies[child_index].postparent = *postparent;
- if (xdata)
- local->replies[child_index].xdata = dict_ref (xdata);
- }
-
- if (local->do_discovery && (op_ret == 0))
- afr_attempt_local_discovery (this, child_index);
-
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_discover_done (frame, this);
- }
+out:
+ if (ret == 0) {
+ gf_uuid_copy(priv->ta_gfid, stbuf.ia_gfid);
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to lookup/create thin-arbiter id file.");
+ }
+ if (dict)
+ dict_unref(dict);
+ if (fd)
+ fd_unref(fd);
+ loc_wipe(&loc);
+
+ return 0;
+}
- return 0;
+static int
+afr_ta_id_file_check_cbk(int ret, call_frame_t *ta_frame, void *opaque)
+{
+ return 0;
}
+static void
+afr_discover_done(call_frame_t *frame, xlator_t *this)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv->thin_arbiter_count)
+ goto unwind;
+ if (!gf_uuid_is_null(priv->ta_gfid))
+ goto unwind;
+
+ ret = synctask_new(this->ctx->env, afr_ta_id_file_check,
+ afr_ta_id_file_check_cbk, NULL, this);
+ if (ret)
+ goto unwind;
+unwind:
+ afr_discover_unwind(frame, this);
+}
int
-afr_discover_do (call_frame_t *frame, xlator_t *this, int err)
+afr_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
{
- int ret = 0;
- int i = 0;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
-
- local = frame->local;
- priv = this->private;
-
- if (err) {
- local->op_errno = -err;
- ret = -1;
- goto out;
- }
-
- call_count = local->call_count = AFR_COUNT (local->child_up,
- priv->child_count);
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int child_index = -1;
+ GF_UNUSED int ret = 0;
+ int8_t need_heal = 1;
+
+ child_index = (long)cookie;
+
+ local = frame->local;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (op_ret != -1) {
+ local->replies[child_index].poststat = *buf;
+ local->replies[child_index].postparent = *postparent;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref(xdata);
+ }
+
+ if (local->do_discovery && (op_ret == 0))
+ afr_attempt_local_discovery(this, child_index);
+
+ if (xdata) {
+ ret = dict_get_int8(xdata, "link-count", &need_heal);
+ local->replies[child_index].need_heal = need_heal;
+ } else {
+ local->replies[child_index].need_heal = need_heal;
+ }
+
+ call_count = afr_frame_return(frame);
+ if (call_count == 0) {
+ afr_set_need_heal(this, local);
+ afr_lookup_metadata_heal_check(frame, this);
+ }
- ret = afr_lookup_xattr_req_prepare (local, this, local->xattr_req,
- &local->loc);
- if (ret) {
- local->op_errno = -ret;
- ret = -1;
- goto out;
- }
+ return 0;
+}
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_discover_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &local->loc, local->xattr_req);
- if (!--call_count)
- break;
- }
+int
+afr_discover_do(call_frame_t *frame, xlator_t *this, int err)
+{
+ int ret = 0;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (err) {
+ local->op_errno = err;
+ goto out;
+ }
+
+ call_count = local->call_count = AFR_COUNT(local->child_up,
+ priv->child_count);
+
+ ret = afr_lookup_xattr_req_prepare(local, this, local->xattr_req,
+ &local->loc);
+ if (ret) {
+ local->op_errno = -ret;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(
+ frame, afr_discover_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->lookup, &local->loc, local->xattr_req);
+ if (!--call_count)
+ break;
}
+ }
- return 0;
+ return 0;
out:
- AFR_STACK_UNWIND (lookup, frame, -1, local->op_errno, 0, 0, 0, 0);
- return 0;
+ AFR_STACK_UNWIND(lookup, frame, -1, local->op_errno, 0, 0, 0, 0);
+ return 0;
}
-
int
-afr_discover (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
{
- int op_errno = ENOMEM;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int event = 0;
+ int op_errno = ENOMEM;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int event = 0;
- priv = this->private;
+ priv = this->private;
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- if (!local->call_count) {
- op_errno = ENOTCONN;
- goto out;
- }
+ if (!local->call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
- if (__is_root_gfid (loc->inode->gfid)) {
- if (!this->itable)
- this->itable = loc->inode->table;
- if (!priv->root_inode)
- priv->root_inode = inode_ref (loc->inode);
+ if (__is_root_gfid(loc->inode->gfid)) {
+ if (!priv->root_inode)
+ priv->root_inode = inode_ref(loc->inode);
- if (priv->choose_local && !priv->did_discovery) {
- /* Logic to detect which subvolumes of AFR are
- local, in order to prefer them for reads
- */
- local->do_discovery = _gf_true;
- priv->did_discovery = _gf_true;
- }
- }
+ if (priv->choose_local && !priv->did_discovery) {
+ /* Logic to detect which subvolumes of AFR are
+ local, in order to prefer them for reads
+ */
+ local->do_discovery = _gf_true;
+ priv->did_discovery = _gf_true;
+ }
+ }
- local->op = GF_FOP_LOOKUP;
+ local->op = GF_FOP_LOOKUP;
- loc_copy (&local->loc, loc);
+ loc_copy(&local->loc, loc);
- local->inode = inode_ref (loc->inode);
+ local->inode = inode_ref(loc->inode);
- if (xattr_req)
- /* If xattr_req was null, afr_lookup_xattr_req_prepare() will
- allocate one for us */
- local->xattr_req = dict_ref (xattr_req);
+ if (xattr_req) {
+ /* If xattr_req was null, afr_lookup_xattr_req_prepare() will
+ allocate one for us */
+ local->xattr_req = dict_copy_with_ref(xattr_req, NULL);
+ if (!local->xattr_req) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
- if (gf_uuid_is_null (loc->inode->gfid)) {
- afr_discover_do (frame, this, 0);
- return 0;
- }
+ if (gf_uuid_is_null(loc->inode->gfid)) {
+ afr_discover_do(frame, this, 0);
+ return 0;
+ }
- afr_read_subvol_get (loc->inode, this, NULL, &event,
- AFR_DATA_TRANSACTION);
+ afr_read_subvol_get(loc->inode, this, NULL, NULL, &event,
+ AFR_DATA_TRANSACTION, NULL);
- if (event != local->event_generation)
- afr_inode_refresh (frame, this, loc->inode, afr_discover_do);
- else
- afr_discover_do (frame, this, 0);
+ afr_discover_do(frame, this, 0);
- return 0;
+ return 0;
out:
- AFR_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- return 0;
+ AFR_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
}
-
int
-afr_lookup_do (call_frame_t *frame, xlator_t *this, int err)
+afr_lookup_do(call_frame_t *frame, xlator_t *this, int err)
{
- int ret = 0;
- int i = 0;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
-
- local = frame->local;
- priv = this->private;
-
- if (err < 0) {
- local->op_errno = -err;
- ret = -1;
- goto out;
- }
-
- call_count = local->call_count = AFR_COUNT (local->child_up,
- priv->child_count);
-
- ret = afr_lookup_xattr_req_prepare (local, this, local->xattr_req,
- &local->loc);
- if (ret) {
- local->op_errno = -ret;
- ret = -1;
- goto out;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &local->loc, local->xattr_req);
- if (!--call_count)
- break;
- }
+ int ret = 0;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (err < 0) {
+ local->op_errno = err;
+ goto out;
+ }
+
+ call_count = local->call_count = AFR_COUNT(local->child_up,
+ priv->child_count);
+
+ ret = afr_lookup_xattr_req_prepare(local, this, local->xattr_req,
+ &local->loc);
+ if (ret) {
+ local->op_errno = -ret;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(
+ frame, afr_lookup_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->lookup, &local->loc, local->xattr_req);
+ if (!--call_count)
+ break;
}
- return 0;
+ }
+ return 0;
out:
- AFR_STACK_UNWIND (lookup, frame, -1, local->op_errno, 0, 0, 0, 0);
- return 0;
+ AFR_STACK_UNWIND(lookup, frame, -1, local->op_errno, 0, 0, 0, 0);
+ return 0;
}
/*
@@ -2118,2635 +4000,3879 @@ out:
*/
int
-afr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
-{
- afr_local_t *local = NULL;
- int32_t op_errno = 0;
- int event = 0;
- void *gfid_req = NULL;
- int ret = 0;
-
- if (!loc->parent && gf_uuid_is_null (loc->pargfid)) {
- if (xattr_req)
- dict_del (xattr_req, "gfid-req");
- afr_discover (frame, this, loc, xattr_req);
- return 0;
- }
-
- if (__is_root_gfid (loc->parent->gfid)) {
- if (!strcmp (loc->name, GF_REPLICATE_TRASH_DIR)) {
- op_errno = EPERM;
- goto out;
- }
- }
-
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
-
- if (!local->call_count) {
- op_errno = ENOTCONN;
- goto out;
- }
+afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ int event = 0;
+ int ret = 0;
+
+ if (loc_is_nameless(loc)) {
+ if (xattr_req)
+ dict_del_sizen(xattr_req, "gfid-req");
+ afr_discover(frame, this, loc, xattr_req);
+ return 0;
+ }
- local->op = GF_FOP_LOOKUP;
+ if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name,
+ frame->root->pid)) {
+ op_errno = EPERM;
+ goto out;
+ }
- loc_copy (&local->loc, loc);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- local->inode = inode_ref (loc->inode);
+ if (!local->call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
- if (xattr_req) {
- /* If xattr_req was null, afr_lookup_xattr_req_prepare() will
- allocate one for us */
- ret = dict_get_ptr (xattr_req, "gfid-req", &gfid_req);
- if (ret == 0) {
- gf_uuid_copy (local->cont.lookup.gfid_req, gfid_req);
- dict_del (xattr_req, "gfid-req");
- }
- local->xattr_req = dict_ref (xattr_req);
+ local->op = GF_FOP_LOOKUP;
+
+ loc_copy(&local->loc, loc);
+
+ local->inode = inode_ref(loc->inode);
+
+ if (xattr_req) {
+ /* If xattr_req was null, afr_lookup_xattr_req_prepare() will
+ allocate one for us */
+ local->xattr_req = dict_copy_with_ref(xattr_req, NULL);
+ if (!local->xattr_req) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ ret = dict_get_gfuuid(local->xattr_req, "gfid-req",
+ &local->cont.lookup.gfid_req);
+ if (ret == 0) {
+ dict_del_sizen(local->xattr_req, "gfid-req");
}
+ }
- afr_read_subvol_get (loc->parent, this, NULL, &event,
- AFR_DATA_TRANSACTION);
+ afr_read_subvol_get(loc->parent, this, NULL, NULL, &event,
+ AFR_DATA_TRANSACTION, NULL);
- if (event != local->event_generation)
- afr_inode_refresh (frame, this, loc->parent, afr_lookup_do);
- else
- afr_lookup_do (frame, this, 0);
+ afr_lookup_do(frame, this, 0);
- return 0;
+ return 0;
out:
- AFR_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ AFR_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
+void
+_afr_cleanup_fd_ctx(xlator_t *this, afr_fd_ctx_t *fd_ctx)
+{
+ afr_private_t *priv = this->private;
-/* {{{ open */
+ if (fd_ctx->lk_heal_info) {
+ LOCK(&priv->lock);
+ {
+ list_del(&fd_ctx->lk_heal_info->pos);
+ }
+ afr_lk_heal_info_cleanup(fd_ctx->lk_heal_info);
+ fd_ctx->lk_heal_info = NULL;
+ }
+ GF_FREE(fd_ctx->opened_on);
+ GF_FREE(fd_ctx);
+ return;
+}
-afr_fd_ctx_t *
-__afr_fd_ctx_get (fd_t *fd, xlator_t *this)
+int
+afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd)
{
- uint64_t ctx = 0;
- int ret = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = 0;
- ret = __fd_ctx_get (fd, this, &ctx);
+ ret = fd_ctx_get(fd, this, &ctx);
+ if (ret < 0)
+ goto out;
- if (ret < 0) {
- ret = __afr_fd_ctx_set (this, fd);
- if (ret < 0)
- goto out;
+ fd_ctx = (afr_fd_ctx_t *)(long)ctx;
- ret = __fd_ctx_get (fd, this, &ctx);
- if (ret < 0)
- goto out;
- }
+ if (fd_ctx) {
+ _afr_cleanup_fd_ctx(this, fd_ctx);
+ }
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
out:
- return fd_ctx;
+ return 0;
}
-
-afr_fd_ctx_t *
-afr_fd_ctx_get (fd_t *fd, xlator_t *this)
+int
+afr_release(xlator_t *this, fd_t *fd)
{
- afr_fd_ctx_t *fd_ctx = NULL;
-
- LOCK(&fd->lock);
- {
- fd_ctx = __afr_fd_ctx_get (fd, this);
- }
- UNLOCK(&fd->lock);
+ afr_cleanup_fd_ctx(this, fd);
- return fd_ctx;
+ return 0;
}
-
-int
-__afr_fd_ctx_set (xlator_t *this, fd_t *fd)
+afr_fd_ctx_t *
+__afr_fd_ctx_get(fd_t *fd, xlator_t *this)
{
- afr_private_t * priv = NULL;
- int ret = -1;
- uint64_t ctx = 0;
- afr_fd_ctx_t * fd_ctx = NULL;
- int i = 0;
+ uint64_t ctx = 0;
+ int ret = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (fd, out);
+ ret = __fd_ctx_get(fd, this, &ctx);
- priv = this->private;
+ if (ret < 0) {
+ ret = __afr_fd_ctx_set(this, fd);
+ if (ret < 0)
+ goto out;
- ret = __fd_ctx_get (fd, this, &ctx);
+ ret = __fd_ctx_get(fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+ }
- if (ret == 0)
- goto out;
+ fd_ctx = (afr_fd_ctx_t *)(long)ctx;
+out:
+ return fd_ctx;
+}
- fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t),
- gf_afr_mt_afr_fd_ctx_t);
- if (!fd_ctx) {
- ret = -ENOMEM;
- goto out;
- }
+afr_fd_ctx_t *
+afr_fd_ctx_get(fd_t *fd, xlator_t *this)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
- for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
- fd_ctx->pre_op_done[i] = GF_CALLOC (sizeof (*fd_ctx->pre_op_done[i]),
- priv->child_count,
- gf_afr_mt_int32_t);
- if (!fd_ctx->pre_op_done[i]) {
- ret = -ENOMEM;
- goto out;
- }
- }
-
- fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
- priv->child_count,
- gf_afr_mt_int32_t);
- if (!fd_ctx->opened_on) {
- ret = -ENOMEM;
- goto out;
- }
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get(fd, this);
+ }
+ UNLOCK(&fd->lock);
- for (i = 0; i < priv->child_count; i++) {
- if (fd_is_anonymous (fd))
- fd_ctx->opened_on[i] = AFR_FD_OPENED;
- else
- fd_ctx->opened_on[i] = AFR_FD_NOT_OPENED;
- }
+ return fd_ctx;
+}
- fd_ctx->lock_piggyback = GF_CALLOC (sizeof (*fd_ctx->lock_piggyback),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->lock_piggyback) {
- ret = -ENOMEM;
- goto out;
- }
+int
+__afr_fd_ctx_set(xlator_t *this, fd_t *fd)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int i = 0;
- fd_ctx->lock_acquired = GF_CALLOC (sizeof (*fd_ctx->lock_acquired),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->lock_acquired) {
- ret = -ENOMEM;
- goto out;
- }
+ VALIDATE_OR_GOTO(this->private, out);
+ VALIDATE_OR_GOTO(fd, out);
- fd_ctx->readdir_subvol = -1;
+ priv = this->private;
- pthread_mutex_init (&fd_ctx->delay_lock, NULL);
+ ret = __fd_ctx_get(fd, this, &ctx);
- INIT_LIST_HEAD (&fd_ctx->eager_locked);
+ if (ret == 0)
+ goto out;
- ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set fd ctx (%p)", fd);
-out:
- return ret;
-}
+ fd_ctx = GF_CALLOC(1, sizeof(afr_fd_ctx_t), gf_afr_mt_afr_fd_ctx_t);
+ if (!fd_ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ fd_ctx->opened_on = GF_CALLOC(sizeof(*fd_ctx->opened_on), priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!fd_ctx->opened_on) {
+ ret = -ENOMEM;
+ goto out;
+ }
-int
-afr_fd_ctx_set (xlator_t *this, fd_t *fd)
-{
- int ret = -1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (fd_is_anonymous(fd))
+ fd_ctx->opened_on[i] = AFR_FD_OPENED;
+ else
+ fd_ctx->opened_on[i] = AFR_FD_NOT_OPENED;
+ }
- LOCK (&fd->lock);
- {
- ret = __afr_fd_ctx_set (this, fd);
- }
- UNLOCK (&fd->lock);
+ fd_ctx->readdir_subvol = -1;
+ fd_ctx->lk_heal_info = NULL;
- return ret;
+ ret = __fd_ctx_set(fd, this, (uint64_t)(long)fd_ctx);
+ if (ret)
+ gf_msg_debug(this->name, 0, "failed to set fd ctx (%p)", fd);
+out:
+ if (ret && fd_ctx)
+ _afr_cleanup_fd_ctx(this, fd_ctx);
+ return ret;
}
/* {{{ flush */
int
-afr_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int call_count = -1;
+ afr_local_t *local = NULL;
+ int call_count = -1;
- local = frame->local;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- local->op_ret = op_ret;
- if (!local->xdata_rsp && xdata)
- local->xdata_rsp = dict_ref (xdata);
- } else {
- local->op_errno = op_errno;
- }
+ LOCK(&frame->lock);
+ {
+ if (op_ret != -1) {
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
+ } else {
+ local->op_errno = op_errno;
}
- UNLOCK (&frame->lock);
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
- call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ AFR_STACK_UNWIND(flush, frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
- if (call_count == 0)
- AFR_STACK_UNWIND (flush, frame, local->op_ret,
- local->op_errno, local->xdata_rsp);
-
- return 0;
+ return 0;
}
static int
-afr_flush_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+afr_flush_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- int i = 0;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
-
- priv = this->private;
- local = frame->local;
- call_count = local->call_count;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+
+ priv = this->private;
+ local = frame->local;
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(frame, afr_flush_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->flush,
+ local->fd, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_flush_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->flush,
- local->fd, xdata);
- if (!--call_count)
- break;
+ return 0;
+}
- }
+afr_local_t *
+afr_wakeup_same_fd_delayed_op(xlator_t *this, afr_lock_t *lock, fd_t *fd)
+{
+ afr_local_t *local = NULL;
+
+ if (lock->delay_timer) {
+ local = list_entry(lock->post_op.next, afr_local_t,
+ transaction.owner_list);
+ if (fd == local->fd) {
+ if (gf_timer_call_cancel(this->ctx, lock->delay_timer)) {
+ local = NULL;
+ } else {
+ lock->delay_timer = NULL;
+ }
+ } else {
+ local = NULL;
}
+ }
- return 0;
+ return local;
+}
+
+void
+afr_delayed_changelog_wake_resume(xlator_t *this, inode_t *inode,
+ call_stub_t *stub)
+{
+ afr_inode_ctx_t *ctx = NULL;
+ afr_lock_t *lock = NULL;
+ afr_local_t *metadata_local = NULL;
+ afr_local_t *data_local = NULL;
+ LOCK(&inode->lock);
+ {
+ (void)__afr_inode_ctx_get(this, inode, &ctx);
+ lock = &ctx->lock[AFR_DATA_TRANSACTION];
+ data_local = afr_wakeup_same_fd_delayed_op(this, lock, stub->args.fd);
+ lock = &ctx->lock[AFR_METADATA_TRANSACTION];
+ metadata_local = afr_wakeup_same_fd_delayed_op(this, lock,
+ stub->args.fd);
+ }
+ UNLOCK(&inode->lock);
+
+ if (data_local) {
+ data_local->transaction.resume_stub = stub;
+ } else if (metadata_local) {
+ metadata_local->transaction.resume_stub = stub;
+ } else {
+ call_resume(stub);
+ }
+ if (data_local) {
+ afr_delayed_changelog_wake_up_cbk(data_local);
+ }
+ if (metadata_local) {
+ afr_delayed_changelog_wake_up_cbk(metadata_local);
+ }
}
int
-afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+afr_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- afr_local_t *local = NULL;
- call_stub_t *stub = NULL;
- int op_errno = ENOMEM;
+ afr_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ int op_errno = ENOMEM;
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- if (!local->call_count) {
- op_errno = ENOTCONN;
- goto out;
- }
+ local->op = GF_FOP_FLUSH;
+ if (!afr_is_consistent_io_possible(local, this->private, &op_errno))
+ goto out;
- local->fd = fd_ref(fd);
+ local->fd = fd_ref(fd);
- stub = fop_flush_stub (frame, afr_flush_wrapper, fd, xdata);
- if (!stub)
- goto out;
+ stub = fop_flush_stub(frame, afr_flush_wrapper, fd, xdata);
+ if (!stub)
+ goto out;
- afr_delayed_changelog_wake_resume (this, fd, stub);
+ afr_delayed_changelog_wake_resume(this, fd->inode, stub);
- return 0;
+ return 0;
out:
- AFR_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
- return 0;
+ AFR_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
+ return 0;
}
-/* }}} */
-
-
int
-afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
+afr_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = 0;
- int i = 0;
-
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0)
- goto out;
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (fd_ctx) {
- //no need to take any locks
- if (!list_empty (&fd_ctx->eager_locked))
- gf_log (this->name, GF_LOG_WARNING, "%s: Stale "
- "Eager-lock stubs found",
- uuid_utoa (fd->inode->gfid));
-
- for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++)
- GF_FREE (fd_ctx->pre_op_done[i]);
+ afr_local_t *local = NULL;
+ int call_count = -1;
- GF_FREE (fd_ctx->opened_on);
+ local = frame->local;
- GF_FREE (fd_ctx->lock_piggyback);
-
- GF_FREE (fd_ctx->lock_acquired);
-
- pthread_mutex_destroy (&fd_ctx->delay_lock);
-
- GF_FREE (fd_ctx);
+ LOCK(&frame->lock);
+ {
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
+ } else {
+ local->op_errno = op_errno;
}
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
-out:
- return 0;
-}
+ if (call_count == 0)
+ AFR_STACK_UNWIND(fsyncdir, frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
+}
int
-afr_release (xlator_t *this, fd_t *fd)
+afr_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- afr_cleanup_fd_ctx (this, fd);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = ENOMEM;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_FSYNCDIR;
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
+
+ call_count = local->call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND(frame, afr_fsyncdir_cbk, priv->children[i],
+ priv->children[i]->fops->fsyncdir, fd, datasync, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
- return 0;
+ return 0;
+out:
+ AFR_STACK_UNWIND(fsyncdir, frame, -1, op_errno, NULL);
+
+ return 0;
}
+/* }}} */
-/* {{{ fsync */
+static int
+afr_serialized_lock_wind(call_frame_t *frame, xlator_t *this);
-int
-afr_fsync_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+static gf_boolean_t
+afr_is_conflicting_lock_present(int32_t op_ret, int32_t op_errno)
{
- AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
- return 0;
+ if (op_ret == -1 && op_errno == EAGAIN)
+ return _gf_true;
+ return _gf_false;
}
-int
-afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+static void
+afr_fop_lock_unwind(call_frame_t *frame, glusterfs_fop_t op, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
- int read_subvol = 0;
- call_stub_t *stub = NULL;
+ switch (op) {
+ case GF_FOP_INODELK:
+ AFR_STACK_UNWIND(inodelk, frame, op_ret, op_errno, xdata);
+ break;
+ case GF_FOP_FINODELK:
+ AFR_STACK_UNWIND(finodelk, frame, op_ret, op_errno, xdata);
+ break;
+ case GF_FOP_ENTRYLK:
+ AFR_STACK_UNWIND(entrylk, frame, op_ret, op_errno, xdata);
+ break;
+ case GF_FOP_FENTRYLK:
+ AFR_STACK_UNWIND(fentrylk, frame, op_ret, op_errno, xdata);
+ break;
+ default:
+ break;
+ }
+}
- local = frame->local;
+static void
+afr_fop_lock_wind(call_frame_t *frame, xlator_t *this, int child_index,
+ int32_t (*lock_cbk)(call_frame_t *, void *, xlator_t *,
+ int32_t, int32_t, dict_t *))
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int i = child_index;
+
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ STACK_WIND_COOKIE(
+ frame, lock_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->inodelk,
+ (const char *)local->cont.inodelk.volume, &local->loc,
+ local->cont.inodelk.cmd, &local->cont.inodelk.flock,
+ local->cont.inodelk.xdata);
+ break;
+ case GF_FOP_FINODELK:
+ STACK_WIND_COOKIE(
+ frame, lock_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->finodelk,
+ (const char *)local->cont.inodelk.volume, local->fd,
+ local->cont.inodelk.cmd, &local->cont.inodelk.flock,
+ local->cont.inodelk.xdata);
+ break;
+ case GF_FOP_ENTRYLK:
+ STACK_WIND_COOKIE(
+ frame, lock_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->entrylk, local->cont.entrylk.volume,
+ &local->loc, local->cont.entrylk.basename,
+ local->cont.entrylk.cmd, local->cont.entrylk.type,
+ local->cont.entrylk.xdata);
+ break;
+ case GF_FOP_FENTRYLK:
+ STACK_WIND_COOKIE(
+ frame, lock_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->fentrylk, local->cont.entrylk.volume,
+ local->fd, local->cont.entrylk.basename,
+ local->cont.entrylk.cmd, local->cont.entrylk.type,
+ local->cont.entrylk.xdata);
+ break;
+ default:
+ break;
+ }
+}
- read_subvol = afr_data_subvol_get (local->inode, this, 0, 0);
+void
+afr_fop_lock_proceed(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- if (local->op_ret == -1) {
- local->op_ret = 0;
-
- local->cont.inode_wfop.prebuf = *prebuf;
- local->cont.inode_wfop.postbuf = *postbuf;
-
- if (xdata)
- local->xdata_rsp = dict_ref (xdata);
- }
-
- if (child_index == read_subvol) {
- local->cont.inode_wfop.prebuf = *prebuf;
- local->cont.inode_wfop.postbuf = *postbuf;
- if (xdata) {
- if (local->xdata_rsp)
- dict_unref (local->xdata_rsp);
- local->xdata_rsp = dict_ref (xdata);
- }
- }
- } else {
- local->op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- /* Make a stub out of the frame, and register it
- with the waking up post-op. When the call-stub resumes,
- we are guaranteed that there was no post-op pending
- (i.e changelogs were unset in the server). This is an
- essential "guarantee", that fsync() returns only after
- completely finishing EVERYTHING, including the delayed
- post-op. This guarantee is expected by FUSE graph switching
- for example.
- */
- stub = fop_fsync_cbk_stub (frame, afr_fsync_unwind_cbk,
- local->op_ret, local->op_errno,
- &local->cont.inode_wfop.prebuf,
- &local->cont.inode_wfop.postbuf,
- local->xdata_rsp);
- if (!stub) {
- AFR_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0);
- return 0;
- }
-
- /* If no new unstable writes happened between the
- time we cleared the unstable write witness flag in afr_fsync
- and now, calling afr_delayed_changelog_wake_up() should
- wake up and skip over the fsync phase and go straight to
- afr_changelog_post_op_now()
- */
- afr_delayed_changelog_wake_resume (this, local->fd, stub);
- }
+ local = frame->local;
+ priv = frame->this->private;
- return 0;
+ if (local->fop_lock_state != AFR_FOP_LOCK_PARALLEL) {
+ afr_fop_lock_unwind(frame, local->op, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return;
+ }
+ /* At least one child is up */
+ /*
+ * Non-blocking locks also need to be serialized. Otherwise there is
+ * a chance that both the mounts which issued same non-blocking inodelk
+ * may endup not acquiring the lock on any-brick.
+ * Ex: Mount1 and Mount2
+ * request for full length lock on file f1. Mount1 afr may acquire the
+ * partial lock on brick-1 and may not acquire the lock on brick-2
+ * because Mount2 already got the lock on brick-2, vice versa. Since
+ * both the mounts only got partial locks, afr treats them as failure in
+ * gaining the locks and unwinds with EAGAIN errno.
+ */
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+ local->fop_lock_state = AFR_FOP_LOCK_SERIAL;
+ afr_local_replies_wipe(local, priv);
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = NULL;
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ local->cont.inodelk.cmd = local->cont.inodelk.in_cmd;
+ local->cont.inodelk.flock = local->cont.inodelk.in_flock;
+ if (local->cont.inodelk.xdata)
+ dict_unref(local->cont.inodelk.xdata);
+ local->cont.inodelk.xdata = NULL;
+ if (local->xdata_req)
+ local->cont.inodelk.xdata = dict_ref(local->xdata_req);
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ local->cont.entrylk.cmd = local->cont.entrylk.in_cmd;
+ if (local->cont.entrylk.xdata)
+ dict_unref(local->cont.entrylk.xdata);
+ local->cont.entrylk.xdata = NULL;
+ if (local->xdata_req)
+ local->cont.entrylk.xdata = dict_ref(local->xdata_req);
+ break;
+ default:
+ break;
+ }
+ afr_serialized_lock_wind(frame, frame->this);
}
+static int32_t
+afr_unlock_partial_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
-int
-afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
- dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = ENOMEM;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int child_index = (long)cookie;
+ uuid_t gfid = {0};
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
+ if (op_ret < 0 && op_errno != ENOTCONN) {
+ if (local->fd)
+ gf_uuid_copy(gfid, local->fd->inode->gfid);
+ else
+ loc_gfid(&local->loc, gfid);
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_UNLOCK_FAIL,
+ "%s: Failed to unlock %s on %s "
+ "with lk_owner: %s",
+ uuid_utoa(gfid), gf_fop_list[local->op],
+ priv->children[child_index]->name,
+ lkowner_utoa(&frame->root->lk_owner));
+ }
+
+ call_count = afr_frame_return(frame);
+ if (call_count == 0)
+ afr_fop_lock_proceed(frame);
+
+ return 0;
+}
- call_count = local->call_count;
- if (!call_count) {
- op_errno = ENOTCONN;
- goto out;
- }
+static int32_t
+afr_unlock_locks_and_proceed(call_frame_t *frame, xlator_t *this,
+ int call_count)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ if (call_count == 0) {
+ afr_fop_lock_proceed(frame);
+ goto out;
+ }
+
+ local = frame->local;
+ priv = this->private;
+ local->call_count = call_count;
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ local->cont.inodelk.flock.l_type = F_UNLCK;
+ local->cont.inodelk.cmd = F_SETLK;
+ if (local->cont.inodelk.xdata)
+ dict_unref(local->cont.inodelk.xdata);
+ local->cont.inodelk.xdata = NULL;
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ local->cont.entrylk.cmd = ENTRYLK_UNLOCK;
+ if (local->cont.entrylk.xdata)
+ dict_unref(local->cont.entrylk.xdata);
+ local->cont.entrylk.xdata = NULL;
+ break;
+ default:
+ break;
+ }
- local->fd = fd_ref (fd);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
- if (afr_fd_has_witnessed_unstable_write (this, fd)) {
- /* don't care. we only wanted to CLEAR the bit */
- }
+ if (local->replies[i].op_ret == -1)
+ continue;
- local->inode = inode_ref (fd->inode);
+ afr_fop_lock_wind(frame, this, i, afr_unlock_partial_lock_cbk);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_fsync_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fsync,
- fd, datasync, xdata);
- if (!--call_count)
- break;
- }
- }
+ if (!--call_count)
+ break;
+ }
- return 0;
out:
- AFR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
-
- return 0;
+ return 0;
}
-/* }}} */
+int32_t
+afr_fop_lock_done(call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ int lock_count = 0;
+ unsigned char *success = NULL;
-/* {{{ fsync */
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
-int
-afr_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- int call_count = -1;
+ local = frame->local;
+ priv = this->private;
+ success = alloca0(priv->child_count);
- local = frame->local;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- local->op_ret = 0;
- if (!local->xdata_rsp && xdata)
- local->xdata_rsp = dict_ref (xdata);
- } else {
- local->op_errno = op_errno;
- }
+ if (local->replies[i].op_ret == 0) {
+ lock_count++;
+ success[i] = 1;
}
- UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ if (local->op_ret == -1 && local->op_errno == EAGAIN)
+ continue;
- if (call_count == 0)
- AFR_STACK_UNWIND (fsyncdir, frame, local->op_ret,
- local->op_errno, local->xdata_rsp);
+ if ((local->replies[i].op_ret == -1) &&
+ (local->replies[i].op_errno == EAGAIN)) {
+ local->op_ret = -1;
+ local->op_errno = EAGAIN;
+ continue;
+ }
- return 0;
-}
+ if (local->replies[i].op_ret == 0)
+ local->op_ret = 0;
+ local->op_errno = local->replies[i].op_errno;
+ }
-int
-afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
- dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = ENOMEM;
+ if (afr_fop_lock_is_unlock(frame))
+ goto unwind;
- priv = this->private;
+ if (afr_is_conflicting_lock_present(local->op_ret, local->op_errno)) {
+ afr_unlock_locks_and_proceed(frame, this, lock_count);
+ } else if (priv->quorum_count && !afr_has_quorum(success, this, NULL)) {
+ local->fop_lock_state = AFR_FOP_LOCK_QUORUM_FAILED;
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+ if (local->op_errno == 0)
+ local->op_errno = afr_quorum_errno(priv);
+ afr_unlock_locks_and_proceed(frame, this, lock_count);
+ } else {
+ goto unwind;
+ }
+
+ return 0;
+unwind:
+ afr_fop_lock_unwind(frame, local->op, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
+}
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
+static int
+afr_common_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = (long)cookie;
- call_count = local->call_count;
- if (!call_count) {
- op_errno = ENOTCONN;
- goto out;
- }
+ local = frame->local;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fsyncdir_cbk,
- priv->children[i],
- priv->children[i]->fops->fsyncdir,
- fd, datasync, xdata);
- if (!--call_count)
- break;
- }
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (op_ret == 0 && xdata) {
+ local->replies[child_index].xdata = dict_ref(xdata);
+ LOCK(&frame->lock);
+ {
+ if (!local->xdata_rsp)
+ local->xdata_rsp = dict_ref(xdata);
}
-
- return 0;
-out:
- AFR_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
-
- return 0;
+ UNLOCK(&frame->lock);
+ }
+ return 0;
}
-/* }}} */
-
-/* {{{ xattrop */
+static int32_t
+afr_serialized_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
-int32_t
-afr_xattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int call_count = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = (long)cookie;
+ int next_child = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+
+ for (next_child = child_index + 1; next_child < priv->child_count;
+ next_child++) {
+ if (local->child_up[next_child])
+ break;
+ }
+
+ if (afr_is_conflicting_lock_present(op_ret, op_errno) ||
+ (next_child == priv->child_count)) {
+ afr_fop_lock_done(frame, this);
+ } else {
+ afr_fop_lock_wind(frame, this, next_child, afr_serialized_lock_cbk);
+ }
+
+ return 0;
+}
- local = frame->local;
+static int
+afr_serialized_lock_wind(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- if (!local->cont.xattrop.xattr)
- local->cont.xattrop.xattr = dict_ref (xattr);
+ priv = this->private;
+ local = frame->local;
- if (!local->xdata_rsp && xdata)
- local->xdata_rsp = dict_ref (xdata);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ afr_fop_lock_wind(frame, this, i, afr_serialized_lock_cbk);
+ break;
+ }
+ }
+ return 0;
+}
- local->op_ret = 0;
- }
+static int32_t
+afr_parallel_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+{
+ int call_count = 0;
- call_count = afr_frame_return (frame);
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
- if (call_count == 0)
- AFR_STACK_UNWIND (xattrop, frame, local->op_ret, local->op_errno,
- local->cont.xattrop.xattr, local->xdata_rsp);
+ call_count = afr_frame_return(frame);
+ if (call_count == 0)
+ afr_fop_lock_done(frame, this);
- return 0;
+ return 0;
}
-
-int32_t
-afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+static int
+afr_parallel_lock_wind(call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = ENOMEM;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+ afr_fop_lock_wind(frame, this, i, afr_parallel_lock_cbk);
+ if (!--call_count)
+ break;
+ }
+ return 0;
+}
- priv = this->private;
+static int
+afr_fop_handle_lock(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = frame->local;
+ int op_errno = 0;
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
+ if (!afr_fop_lock_is_unlock(frame)) {
+ if (!afr_is_consistent_io_possible(local, this->private, &op_errno))
+ goto out;
- call_count = local->call_count;
- if (!call_count) {
- op_errno = ENOTCONN;
- goto out;
- }
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ local->cont.inodelk.cmd = F_SETLK;
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ local->cont.entrylk.cmd = ENTRYLK_LOCK_NB;
+ break;
+ default:
+ break;
+ }
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_xattrop_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- loc, optype, xattr, xdata);
- if (!--call_count)
- break;
- }
+ if (local->xdata_req) {
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ local->cont.inodelk.xdata = dict_ref(local->xdata_req);
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ local->cont.entrylk.xdata = dict_ref(local->xdata_req);
+ break;
+ default:
+ break;
}
+ }
- return 0;
+ local->fop_lock_state = AFR_FOP_LOCK_PARALLEL;
+ afr_parallel_lock_wind(frame, this);
out:
- AFR_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
-
- return 0;
+ return -op_errno;
}
-/* }}} */
+static int32_t
+afr_handle_inodelk(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ const char *volume, loc_t *loc, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = ENOMEM;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = fop;
+ if (loc)
+ loc_copy(&local->loc, loc);
+ if (fd && (flock->l_type != F_UNLCK)) {
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local->fd = fd_ref(fd);
+ }
+
+ local->cont.inodelk.volume = gf_strdup(volume);
+ if (!local->cont.inodelk.volume) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->cont.inodelk.in_cmd = cmd;
+ local->cont.inodelk.cmd = cmd;
+ local->cont.inodelk.in_flock = *flock;
+ local->cont.inodelk.flock = *flock;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ op_errno = -afr_fop_handle_lock(frame, frame->this);
+ if (op_errno)
+ goto out;
+ return 0;
+out:
+ afr_fop_lock_unwind(frame, fop, -1, op_errno, NULL);
-/* {{{ fxattrop */
+ return 0;
+}
int32_t
-afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr, dict_t *xdata)
+afr_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- if (!local->cont.fxattrop.xattr)
- local->cont.fxattrop.xattr = dict_ref (xattr);
-
- if (!local->xdata_rsp && xdata)
- local->xdata_rsp = dict_ref (xdata);
- local->op_ret = 0;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ afr_handle_inodelk(frame, this, GF_FOP_INODELK, volume, loc, NULL, cmd,
+ flock, xdata);
+ return 0;
+}
- call_count = afr_frame_return (frame);
+int32_t
+afr_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ afr_handle_inodelk(frame, this, GF_FOP_FINODELK, volume, NULL, fd, cmd,
+ flock, xdata);
+ return 0;
+}
- if (call_count == 0)
- AFR_STACK_UNWIND (fxattrop, frame, local->op_ret, local->op_errno,
- local->cont.fxattrop.xattr, local->xdata_rsp);
+static int
+afr_handle_entrylk(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ const char *volume, loc_t *loc, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = ENOMEM;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = fop;
+ if (loc)
+ loc_copy(&local->loc, loc);
+ if (fd && (cmd != ENTRYLK_UNLOCK)) {
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local->fd = fd_ref(fd);
+ }
+ local->cont.entrylk.cmd = cmd;
+ local->cont.entrylk.in_cmd = cmd;
+ local->cont.entrylk.type = type;
+ local->cont.entrylk.volume = gf_strdup(volume);
+ local->cont.entrylk.basename = gf_strdup(basename);
+ if (!local->cont.entrylk.volume || !local->cont.entrylk.basename) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+ op_errno = -afr_fop_handle_lock(frame, frame->this);
+ if (op_errno)
+ goto out;
+
+ return 0;
+out:
+ afr_fop_lock_unwind(frame, fop, -1, op_errno, NULL);
+ return 0;
+}
- return 0;
+int
+afr_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ afr_handle_entrylk(frame, this, GF_FOP_ENTRYLK, volume, loc, NULL, basename,
+ cmd, type, xdata);
+ return 0;
}
+int
+afr_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ afr_handle_entrylk(frame, this, GF_FOP_FENTRYLK, volume, NULL, fd, basename,
+ cmd, type, xdata);
+ return 0;
+}
-int32_t
-afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+int
+afr_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct statvfs *statvfs, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = 0;
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ struct statvfs *buf = NULL;
- priv = this->private;
+ local = frame->local;
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
+ LOCK(&frame->lock);
+ {
+ if (op_ret != 0) {
+ local->op_errno = op_errno;
+ goto unlock;
+ }
- call_count = local->call_count;
- if (!call_count) {
- op_errno = ENOTCONN;
- goto out;
- }
+ local->op_ret = op_ret;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fxattrop_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- fd, optype, xattr, xdata);
- if (!--call_count)
- break;
+ buf = &local->cont.statfs.buf;
+ if (local->cont.statfs.buf_set) {
+ if (statvfs->f_bavail < buf->f_bavail) {
+ *buf = *statvfs;
+ if (xdata) {
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = dict_ref(xdata);
}
+ }
+ } else {
+ *buf = *statvfs;
+ local->cont.statfs.buf_set = 1;
+ if (xdata)
+ local->xdata_rsp = dict_ref(xdata);
}
+ }
+unlock:
+ call_count = --local->call_count;
+ UNLOCK(&frame->lock);
- return 0;
-out:
- AFR_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
+ if (call_count == 0)
+ AFR_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno,
+ &local->cont.statfs.buf, local->xdata_rsp);
- return 0;
+ return 0;
}
-/* }}} */
-
-int32_t
-afr_unlock_partial_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-
+int
+afr_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int child_index = (long)cookie;
- uuid_t gfid = {0};
-
- local = frame->local;
- priv = this->private;
-
- if (op_ret < 0 && op_errno != ENOTCONN) {
- loc_gfid (&local->loc, gfid);
- gf_log (this->name, GF_LOG_ERROR, "%s: Failed to unlock %s "
- "with lk_owner: %s (%s)", uuid_utoa (gfid),
- priv->children[child_index]->name,
- lkowner_utoa (&frame->root->lk_owner),
- strerror (op_errno));
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_count = 0;
+ int32_t op_errno = ENOMEM;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_STATFS;
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
+
+ if (priv->arbiter_count == 1 && local->child_up[ARBITER_BRICK_INDEX])
+ local->call_count--;
+ call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ if (AFR_IS_ARBITER_BRICK(priv, i))
+ continue;
+ STACK_WIND(frame, afr_statfs_cbk, priv->children[i],
+ priv->children[i]->fops->statfs, loc, xdata);
+ if (!--call_count)
+ break;
}
+ }
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
- local->op_errno, local->xdata_rsp);
- }
+ return 0;
+out:
+ AFR_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
int32_t
-afr_unlock_inodelks_and_unwind (call_frame_t *frame, xlator_t *this,
- int call_count)
+afr_lk_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
- int i = 0;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- priv = this->private;
- local->call_count = call_count;
- local->cont.inodelk.flock.l_type = F_UNLCK;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!local->replies[i].valid)
- continue;
-
- if (local->replies[i].op_ret == -1)
- continue;
-
- STACK_WIND_COOKIE (frame, afr_unlock_partial_inodelk_cbk,
- (void*) (long) i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- local->cont.inodelk.volume,
- &local->loc, local->cont.inodelk.cmd,
- &local->cont.inodelk.flock, 0);
-
- if (!--call_count)
- break;
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
+ int call_count = -1;
+ int child_index = (long)cookie;
+
+ local = frame->local;
+
+ if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_UNLOCK_FAIL,
+ "gfid=%s: unlock failed on subvolume %s "
+ "with lock owner %s",
+ uuid_utoa(local->fd->inode->gfid),
+ priv->children[child_index]->name,
+ lkowner_utoa(&frame->root->lk_owner));
+ }
+
+ call_count = afr_frame_return(frame);
+ if (call_count == 0) {
+ AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno, NULL,
+ local->xdata_rsp);
+ }
+
+ return 0;
}
int32_t
-afr_inodelk_done (call_frame_t *frame, xlator_t *this)
+afr_lk_unlock(call_frame_t *frame, xlator_t *this)
{
- int i = 0;
- int lock_count = 0;
-
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_count = 0;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- if (!local->replies[i].valid)
- continue;
+ call_count = afr_locked_nodes_count(local->cont.lk.locked_nodes,
+ priv->child_count);
- if (local->replies[i].op_ret == 0)
- lock_count++;
+ if (call_count == 0) {
+ AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno, NULL,
+ local->xdata_rsp);
+ return 0;
+ }
- if (local->op_ret == -1 && local->op_errno == EAGAIN)
- continue;
+ local->call_count = call_count;
- if ((local->replies[i].op_ret == -1) &&
- (local->replies[i].op_errno == EAGAIN)) {
- local->op_ret = -1;
- local->op_errno = EAGAIN;
- continue;
- }
+ local->cont.lk.user_flock.l_type = F_UNLCK;
- if (local->replies[i].op_ret == 0)
- local->op_ret = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lk.locked_nodes[i]) {
+ STACK_WIND_COOKIE(frame, afr_lk_unlock_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->lk,
+ local->fd, F_SETLK, &local->cont.lk.user_flock,
+ NULL);
- local->op_errno = local->replies[i].op_errno;
- }
-
- if (lock_count && local->cont.inodelk.flock.l_type != F_UNLCK &&
- (local->op_ret == -1 && local->op_errno == EAGAIN)) {
- afr_unlock_inodelks_and_unwind (frame, this,
- lock_count);
- } else {
- AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
- local->op_errno, local->xdata_rsp);
+ if (!--call_count)
+ break;
}
+ }
- return 0;
+ return 0;
}
-int
-afr_common_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+int32_t
+afr_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int child_index = (long)cookie;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = -1;
- local = frame->local;
+ local = frame->local;
+ priv = this->private;
- local->replies[child_index].valid = 1;
- local->replies[child_index].op_ret = op_ret;
- local->replies[child_index].op_errno = op_errno;
- if (op_ret == 0 && xdata) {
- local->replies[child_index].xdata = dict_ref (xdata);
- LOCK (&frame->lock);
- {
- if (!local->xdata_rsp)
- local->xdata_rsp = dict_ref (xdata);
- }
- UNLOCK (&frame->lock);
- }
- return 0;
-}
+ child_index = (long)cookie;
-static int32_t
-afr_parallel_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+ if (op_ret < 0 && op_errno == EAGAIN) {
+ local->op_ret = -1;
+ local->op_errno = EAGAIN;
-{
- int call_count = 0;
+ afr_lk_unlock(frame, this);
+ return 0;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->cont.lk.locked_nodes[child_index] = 1;
+ local->cont.lk.ret_flock = *lock;
+ }
+
+ child_index++;
+
+ if (child_index < priv->child_count) {
+ STACK_WIND_COOKIE(frame, afr_lk_cbk, (void *)(long)child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->lk, local->fd,
+ local->cont.lk.cmd, &local->cont.lk.user_flock,
+ local->xdata_req);
+ } else if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lk.locked_nodes, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
- afr_common_inodelk_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+ afr_lk_unlock(frame, this);
+ } else {
+ if (local->op_ret < 0)
+ local->op_errno = afr_final_errno(local, priv);
- call_count = afr_frame_return (frame);
- if (call_count == 0)
- afr_inodelk_done (frame, this);
+ AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno,
+ &local->cont.lk.ret_flock, local->xdata_rsp);
+ }
- return 0;
+ return 0;
}
-static inline gf_boolean_t
-afr_is_conflicting_lock_present (int32_t op_ret, int32_t op_errno)
+int
+afr_lk_transaction_cbk(int ret, call_frame_t *frame, void *opaque)
{
- if (op_ret == -1 && op_errno == EAGAIN)
- return _gf_true;
- return _gf_false;
+ return 0;
}
-static int32_t
-afr_serialized_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-
+int
+afr_lk_txn_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int child_index = (long)cookie;
- int next_child = 0;
-
- local = frame->local;
- priv = this->private;
-
- afr_common_inodelk_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+ afr_local_t *local = NULL;
+ int child_index = -1;
+
+ local = frame->local;
+ child_index = (long)cookie;
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->cont.lk.locked_nodes[child_index] = 1;
+ local->cont.lk.ret_flock = *lock;
+ }
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
- for (next_child = child_index + 1; next_child < priv->child_count;
- next_child++) {
- if (local->child_up[next_child])
- break;
+int
+afr_lk_txn_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int child_index = (long)cookie;
+
+ if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_UNLOCK_FAIL,
+ "gfid=%s: unlock failed on subvolume %s "
+ "with lock owner %s",
+ uuid_utoa(local->fd->inode->gfid),
+ priv->children[child_index]->name,
+ lkowner_utoa(&frame->root->lk_owner));
+ }
+ return 0;
+}
+int
+afr_lk_transaction(void *opaque)
+{
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ char *wind_on = NULL;
+ int op_errno = 0;
+ int i = 0;
+ int ret = 0;
+
+ frame = (call_frame_t *)opaque;
+ local = frame->local;
+ this = frame->this;
+ priv = this->private;
+ wind_on = alloca0(priv->child_count);
+
+ if (priv->arbiter_count || priv->child_count != 3) {
+ op_errno = ENOTSUP;
+ gf_msg(frame->this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "%s: Lock healing supported only for replica 3 volumes.",
+ uuid_utoa(local->fd->inode->gfid));
+ goto err;
+ }
+
+ op_errno = -afr_dom_lock_acquire(frame); // Released during
+ // AFR_STACK_UNWIND
+ if (op_errno != 0) {
+ goto err;
+ }
+ if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lk.dom_locked_nodes, this, NULL)) {
+ op_errno = afr_final_errno(local, priv);
+ goto err;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->child_up[i] && local->cont.lk.dom_locked_nodes[i])
+ wind_on[i] = 1;
+ }
+ AFR_ONLIST(wind_on, frame, afr_lk_txn_wind_cbk, lk, local->fd,
+ local->cont.lk.cmd, &local->cont.lk.user_flock,
+ local->xdata_req);
+
+ if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lk.locked_nodes, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+ goto unlock;
+ } else {
+ if (local->cont.lk.user_flock.l_type == F_UNLCK)
+ ret = afr_remove_lock_from_saved_locks(local, this);
+ else
+ ret = afr_add_lock_to_saved_locks(frame, this);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto unlock;
}
+ AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno,
+ &local->cont.lk.ret_flock, local->xdata_rsp);
+ }
- if (afr_is_conflicting_lock_present (op_ret, op_errno) ||
- (next_child == priv->child_count)) {
- afr_inodelk_done (frame, this);
- } else {
- STACK_WIND_COOKIE (frame, afr_serialized_inodelk_cbk,
- (void *) (long) next_child,
- priv->children[next_child],
- priv->children[next_child]->fops->inodelk,
- (const char *)local->cont.inodelk.volume,
- &local->loc, local->cont.inodelk.cmd,
- &local->cont.inodelk.flock,
- local->xdata_req);
- }
+ return 0;
- return 0;
+unlock:
+ local->cont.lk.user_flock.l_type = F_UNLCK;
+ AFR_ONLIST(local->cont.lk.locked_nodes, frame, afr_lk_txn_unlock_cbk, lk,
+ local->fd, F_SETLK, &local->cont.lk.user_flock, NULL);
+err:
+ AFR_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
+ return -1;
}
-static int
-afr_parallel_inodelk_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int call_count = 0;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- call_count = local->call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i])
- continue;
- STACK_WIND_COOKIE (frame, afr_parallel_inodelk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- (const char *)local->cont.inodelk.volume,
- &local->loc, local->cont.inodelk.cmd,
- &local->cont.inodelk.flock,
- local->xdata_req);
- if (!--call_count)
- break;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+ int i = 0;
+ int32_t op_errno = ENOMEM;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_LK;
+ if (!afr_lk_is_unlock(cmd, flock)) {
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
+ }
+
+ local->cont.lk.locked_nodes = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.locked_nodes),
+ gf_afr_mt_char);
+
+ if (!local->cont.lk.locked_nodes) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->fd = fd_ref(fd);
+ local->cont.lk.cmd = cmd;
+ local->cont.lk.user_flock = *flock;
+ local->cont.lk.ret_flock = *flock;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ if (afr_is_lock_mode_mandatory(xdata)) {
+ ret = synctask_new(this->ctx->env, afr_lk_transaction,
+ afr_lk_transaction_cbk, frame, frame);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto out;
}
return 0;
-}
+ }
-static int
-afr_serialized_inodelk_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = 0;
+ STACK_WIND_COOKIE(frame, afr_lk_cbk, (void *)(long)0, priv->children[i],
+ priv->children[i]->fops->lk, fd, cmd, flock,
+ local->xdata_req);
- priv = this->private;
- local = frame->local;
+ return 0;
+out:
+ AFR_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_serialized_inodelk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- (const char *)local->cont.inodelk.volume,
- &local->loc, local->cont.inodelk.cmd,
- &local->cont.inodelk.flock,
- local->xdata_req);
- break;
- }
- }
- return 0;
+ return 0;
}
int32_t
-afr_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *flock, dict_t *xdata)
+afr_lease_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_lease *lease,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- int32_t op_errno = ENOMEM;
+ afr_local_t *local = NULL;
+ int call_count = -1;
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
+ local = frame->local;
+ call_count = afr_frame_return(frame);
- loc_copy (&local->loc, loc);
- local->cont.inodelk.volume = gf_strdup (volume);
- if (!local->cont.inodelk.volume) {
- op_errno = ENOMEM;
- goto out;
- }
+ if (call_count == 0)
+ AFR_STACK_UNWIND(lease, frame, local->op_ret, local->op_errno, lease,
+ xdata);
- local->cont.inodelk.cmd = cmd;
- local->cont.inodelk.flock = *flock;
- if (xdata)
- local->xdata_req = dict_ref (xdata);
-
- /* At least one child is up */
- /*
- * Non-blocking locks also need to be serialized. Otherwise there is
- * a chance that both the mounts which issued same non-blocking inodelk
- * may endup not acquiring the lock on any-brick.
- * Ex: Mount1 and Mount2
- * request for full length lock on file f1. Mount1 afr may acquire the
- * partial lock on brick-1 and may not acquire the lock on brick-2
- * because Mount2 already got the lock on brick-2, vice versa. Since
- * both the mounts only got partial locks, afr treats them as failure in
- * gaining the locks and unwinds with EAGAIN errno.
- */
- if (flock->l_type == F_UNLCK) {
- afr_parallel_inodelk_wind (frame, this);
- } else {
- afr_serialized_inodelk_wind (frame, this);
- }
-
- return 0;
-out:
- AFR_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
-
- return 0;
+ return 0;
}
-
int32_t
-afr_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-
+afr_lease_unlock(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_count = 0;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
+ local = frame->local;
+ priv = this->private;
- if (call_count == 0)
- AFR_STACK_UNWIND (finodelk, frame, local->op_ret,
- local->op_errno, xdata);
+ call_count = afr_locked_nodes_count(local->cont.lease.locked_nodes,
+ priv->child_count);
+ if (call_count == 0) {
+ AFR_STACK_UNWIND(lease, frame, local->op_ret, local->op_errno,
+ &local->cont.lease.ret_lease, NULL);
return 0;
-}
-
-
-int32_t
-afr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
- int32_t cmd, struct gf_flock *flock, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = ENOMEM;
+ }
- priv = this->private;
+ local->call_count = call_count;
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
+ local->cont.lease.user_lease.cmd = GF_UNLK_LEASE;
- call_count = local->call_count;
- if (!call_count) {
- op_errno = ENOTCONN;
- goto out;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lease.locked_nodes[i]) {
+ STACK_WIND(frame, afr_lease_unlock_cbk, priv->children[i],
+ priv->children[i]->fops->lease, &local->loc,
+ &local->cont.lease.user_lease, NULL);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_finodelk_cbk,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- volume, fd, cmd, flock, xdata);
-
- if (!--call_count)
- break;
- }
+ if (!--call_count)
+ break;
}
+ }
- return 0;
-out:
- AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
-
- return 0;
+ return 0;
}
-
int32_t
-afr_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_lease *lease, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int call_count = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = -1;
- local = frame->local;
+ local = frame->local;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
+ child_index = (long)cookie;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+ if (op_ret < 0 && op_errno == EAGAIN) {
+ local->op_ret = -1;
+ local->op_errno = EAGAIN;
- call_count = afr_frame_return (frame);
+ afr_lease_unlock(frame, this);
+ return 0;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->cont.lease.locked_nodes[child_index] = 1;
+ local->cont.lease.ret_lease = *lease;
+ }
+
+ child_index++;
+ if (child_index < priv->child_count) {
+ STACK_WIND_COOKIE(frame, afr_lease_cbk, (void *)(long)child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->lease, &local->loc,
+ &local->cont.lease.user_lease, xdata);
+ } else if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lease.locked_nodes, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
- if (call_count == 0)
- AFR_STACK_UNWIND (entrylk, frame, local->op_ret,
- local->op_errno, xdata);
+ afr_lease_unlock(frame, this);
+ } else {
+ if (local->op_ret < 0)
+ local->op_errno = afr_final_errno(local, priv);
+ AFR_STACK_UNWIND(lease, frame, local->op_ret, local->op_errno,
+ &local->cont.lease.ret_lease, NULL);
+ }
- return 0;
+ return 0;
}
-
int
-afr_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, const char *basename, entrylk_cmd cmd,
- entrylk_type type, dict_t *xdata)
+afr_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int32_t op_errno = ENOMEM;
- priv = this->private;
+ priv = this->private;
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- call_count = local->call_count;
- if (!call_count) {
- op_errno = ENOTCONN;
- goto out;
- }
+ local->op = GF_FOP_LEASE;
+ local->cont.lease.locked_nodes = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lease.locked_nodes),
+ gf_afr_mt_char);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_entrylk_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- volume, loc, basename, cmd, type, xdata);
-
- if (!--call_count)
- break;
- }
- }
+ if (!local->cont.lease.locked_nodes) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- return 0;
-out:
- AFR_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
+ loc_copy(&local->loc, loc);
+ local->cont.lease.user_lease = *lease;
+ local->cont.lease.ret_lease = *lease;
- return 0;
-}
+ STACK_WIND_COOKIE(frame, afr_lease_cbk, (void *)(long)0, priv->children[0],
+ priv->children[0]->fops->lease, loc, lease, xdata);
+ return 0;
+out:
+ AFR_STACK_UNWIND(lease, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
int
-afr_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-
+afr_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
+ afr_local_t *local = NULL;
+ int child_index = (long)cookie;
+ int call_count = 0;
+ gf_boolean_t failed = _gf_false;
+ gf_boolean_t succeeded = _gf_false;
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref(xdata);
+
+ call_count = afr_frame_return(frame);
+ if (call_count)
+ goto out;
+ /* If any of the subvolumes failed with other than ENOTCONN
+ * return error else return success unless all the subvolumes
+ * failed.
+ * TODO: In case of failure, we need to unregister the xattrs
+ * from the other subvolumes where it succeeded (once upcall
+ * fixes the Bz-1371622)*/
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret < 0 &&
+ local->replies[i].op_errno != ENOTCONN) {
+ local->op_ret = local->replies[i].op_ret;
+ local->op_errno = local->replies[i].op_errno;
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = NULL;
+ if (local->replies[i].xdata) {
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ }
+ failed = _gf_true;
+ break;
+ }
+ if (local->replies[i].op_ret == 0) {
+ succeeded = _gf_true;
+ local->op_ret = 0;
+ local->op_errno = 0;
+ if (!local->xdata_rsp && local->replies[i].xdata) {
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ }
+ }
+ }
+
+ if (!succeeded && !failed) {
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ }
- if (call_count == 0)
- AFR_STACK_UNWIND (fentrylk, frame, local->op_ret,
- local->op_errno, xdata);
+ AFR_STACK_UNWIND(ipc, frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
- return 0;
+out:
+ return 0;
}
-
int
-afr_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
- const char *basename, entrylk_cmd cmd, entrylk_type type,
- dict_t *xdata)
+afr_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = ENOMEM;
+ afr_local_t *local = NULL;
+ int32_t op_errno = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_cnt = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
- priv = this->private;
+ if (op != GF_IPC_TARGET_UPCALL)
+ goto wind_default;
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
+ VALIDATE_OR_GOTO(this->private, err);
+ priv = this->private;
- call_count = local->call_count;
- if (!call_count) {
- op_errno = ENOTCONN;
- goto out;
- }
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto err;
+ call_cnt = local->call_count;
+
+ if (xdata) {
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fentrylk_cbk,
- priv->children[i],
- priv->children[i]->fops->fentrylk,
- volume, fd, basename, cmd, type, xdata);
-
- if (!--call_count)
- break;
- }
+ if (dict_set_int8(xdata, priv->pending_key[i], 0) < 0)
+ goto err;
}
+ }
- return 0;
-out:
- AFR_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
- return 0;
+ STACK_WIND_COOKIE(frame, afr_ipc_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->ipc, op,
+ xdata);
+ if (!--call_cnt)
+ break;
+ }
+ return 0;
+
+err:
+ if (op_errno == -1)
+ op_errno = errno;
+ AFR_STACK_UNWIND(ipc, frame, -1, op_errno, NULL);
+
+ return 0;
+
+wind_default:
+ STACK_WIND(frame, default_ipc_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ipc, op, xdata);
+ return 0;
}
-
int
-afr_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct statvfs *statvfs, dict_t *xdata)
+afr_forget(xlator_t *this, inode_t *inode)
{
- afr_local_t *local = NULL;
- int call_count = 0;
- struct statvfs *buf = NULL;
-
- LOCK (&frame->lock);
- {
- local = frame->local;
-
- if (op_ret != 0) {
- local->op_errno = op_errno;
- goto unlock;
- }
-
- local->op_ret = op_ret;
-
- buf = &local->cont.statfs.buf;
- if (local->cont.statfs.buf_set) {
- if (statvfs->f_bavail < buf->f_bavail) {
- *buf = *statvfs;
- if (xdata) {
- if (local->xdata_rsp)
- dict_unref (local->xdata_rsp);
- local->xdata_rsp = dict_ref (xdata);
- }
- }
- } else {
- *buf = *statvfs;
- local->cont.statfs.buf_set = 1;
- if (xdata)
- local->xdata_rsp = dict_ref (xdata);
- }
- }
-unlock:
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
- &local->cont.statfs.buf, local->xdata_rsp);
+ uint64_t ctx_int = 0;
+ afr_inode_ctx_t *ctx = NULL;
+ afr_spb_choice_timeout_cancel(this, inode);
+ inode_ctx_del(inode, this, &ctx_int);
+ if (!ctx_int)
return 0;
-}
+ ctx = (afr_inode_ctx_t *)(uintptr_t)ctx_int;
+ afr_inode_ctx_destroy(ctx);
+ return 0;
+}
int
-afr_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+afr_priv_dump(xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = 0;
- int32_t op_errno = ENOMEM;
-
- priv = this->private;
-
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
-
- call_count = local->call_count;
- if (!call_count) {
- op_errno = ENOTCONN;
- goto out;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_statfs_cbk,
- priv->children[i],
- priv->children[i]->fops->statfs,
- loc, xdata);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-out:
- AFR_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
-
- return 0;
+ afr_private_t *priv = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+
+ GF_ASSERT(this);
+ priv = this->private;
+
+ GF_ASSERT(priv);
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+ gf_proc_dump_add_section("%s", key_prefix);
+ gf_proc_dump_write("child_count", "%u", priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ sprintf(key, "child_up[%d]", i);
+ gf_proc_dump_write(key, "%d", priv->child_up[i]);
+ sprintf(key, "pending_key[%d]", i);
+ gf_proc_dump_write(key, "%s", priv->pending_key[i]);
+ sprintf(key, "pending_reads[%d]", i);
+ gf_proc_dump_write(key, "%" PRId64,
+ GF_ATOMIC_GET(priv->pending_reads[i]));
+ sprintf(key, "child_latency[%d]", i);
+ gf_proc_dump_write(key, "%" PRId64, priv->child_latency[i]);
+ sprintf(key, "halo_child_up[%d]", i);
+ gf_proc_dump_write(key, "%d", priv->halo_child_up[i]);
+ }
+ gf_proc_dump_write("data_self_heal", "%d", priv->data_self_heal);
+ gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal);
+ gf_proc_dump_write("entry_self_heal", "%d", priv->entry_self_heal);
+ gf_proc_dump_write("read_child", "%d", priv->read_child);
+ gf_proc_dump_write("wait_count", "%u", priv->wait_count);
+ gf_proc_dump_write("heal-wait-queue-length", "%d", priv->heal_wait_qlen);
+ gf_proc_dump_write("heal-waiters", "%d", priv->heal_waiters);
+ gf_proc_dump_write("background-self-heal-count", "%d",
+ priv->background_self_heal_count);
+ gf_proc_dump_write("healers", "%d", priv->healers);
+ gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode);
+ gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode);
+ if (priv->quorum_count == AFR_QUORUM_AUTO) {
+ gf_proc_dump_write("quorum-type", "auto");
+ } else if (priv->quorum_count == 0) {
+ gf_proc_dump_write("quorum-type", "none");
+ } else {
+ gf_proc_dump_write("quorum-type", "fixed");
+ gf_proc_dump_write("quorum-count", "%d", priv->quorum_count);
+ }
+ gf_proc_dump_write("up", "%u", afr_has_quorum(priv->child_up, this, NULL));
+ if (priv->thin_arbiter_count) {
+ gf_proc_dump_write("ta_child_up", "%d", priv->ta_child_up);
+ gf_proc_dump_write("ta_bad_child_index", "%d",
+ priv->ta_bad_child_index);
+ gf_proc_dump_write("ta_notify_dom_lock_offset", "%" PRId64,
+ priv->ta_notify_dom_lock_offset);
+ }
+
+ return 0;
}
+/**
+ * find_child_index - find the child's index in the array of subvolumes
+ * @this: AFR
+ * @child: child
+ */
-int32_t
-afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
- dict_t *xdata)
+static int
+afr_find_child_index(xlator_t *this, xlator_t *child)
{
- afr_local_t * local = NULL;
- int call_count = -1;
-
- local = frame->local;
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- lock, xdata);
-
- return 0;
+ afr_private_t *priv = NULL;
+ int child_count = -1;
+ int i = -1;
+
+ priv = this->private;
+ child_count = priv->child_count;
+ if (priv->thin_arbiter_count) {
+ child_count++;
+ }
+
+ for (i = 0; i < child_count; i++) {
+ if ((xlator_t *)child == priv->children[i])
+ break;
+ }
+
+ return i;
}
-
-int32_t
-afr_lk_unlock (call_frame_t *frame, xlator_t *this)
+int
+__afr_get_up_children_count(afr_private_t *priv)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int i = 0;
- int call_count = 0;
-
- local = frame->local;
- priv = this->private;
+ int up_children = 0;
+ int i = 0;
- call_count = afr_locked_nodes_count (local->cont.lk.locked_nodes,
- priv->child_count);
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 1)
+ up_children++;
- if (call_count == 0) {
- AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.ret_flock, NULL);
- return 0;
- }
-
- local->call_count = call_count;
+ return up_children;
+}
- local->cont.lk.user_flock.l_type = F_UNLCK;
+static int
+__get_heard_from_all_status(xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ int i;
- for (i = 0; i < priv->child_count; i++) {
- if (local->cont.lk.locked_nodes[i]) {
- STACK_WIND (frame, afr_lk_unlock_cbk,
- priv->children[i],
- priv->children[i]->fops->lk,
- local->fd, F_SETLK,
- &local->cont.lk.user_flock, NULL);
-
- if (!--call_count)
- break;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->last_event[i]) {
+ return 0;
}
-
+ }
+ if (priv->thin_arbiter_count && !priv->ta_child_up) {
return 0;
+ }
+ return 1;
}
-
-int32_t
-afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+glusterfs_event_t
+__afr_transform_event_from_state(xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int child_index = -1;
-/* int ret = 0; */
+ int i = 0;
+ int up_children = 0;
+ afr_private_t *priv = this->private;
+
+ if (__get_heard_from_all_status(this))
+ /* have_heard_from_all. Let afr_notify() do the propagation. */
+ return GF_EVENT_MAXVAL;
+
+ up_children = __afr_get_up_children_count(priv);
+ /* Treat the children with pending notification, as having sent a
+ * GF_EVENT_CHILD_DOWN. i.e. set the event as GF_EVENT_SOME_DESCENDENT_DOWN,
+ * as done in afr_notify() */
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->last_event[i])
+ continue;
+ priv->last_event[i] = GF_EVENT_SOME_DESCENDENT_DOWN;
+ priv->child_up[i] = 0;
+ }
+
+ if (up_children)
+ /* We received at least one child up */
+ return GF_EVENT_CHILD_UP;
+ else
+ return GF_EVENT_CHILD_DOWN;
+
+ return GF_EVENT_MAXVAL;
+}
+static void
+afr_notify_cbk(void *data)
+{
+ xlator_t *this = data;
+ afr_private_t *priv = this->private;
+ glusterfs_event_t event = GF_EVENT_MAXVAL;
+ gf_boolean_t propagate = _gf_false;
+
+ LOCK(&priv->lock);
+ {
+ if (!priv->timer) {
+ /*
+ * Either child_up/child_down is already sent to parent.
+ * This is a spurious wake up.
+ */
+ goto unlock;
+ }
+ priv->timer = NULL;
+ event = __afr_transform_event_from_state(this);
+ if (event != GF_EVENT_MAXVAL)
+ propagate = _gf_true;
+ }
+unlock:
+ UNLOCK(&priv->lock);
+ if (propagate)
+ default_notify(this, event, NULL);
+}
- local = frame->local;
- priv = this->private;
+static void
+__afr_launch_notify_timer(xlator_t *this, afr_private_t *priv)
+{
+ struct timespec delay = {
+ 0,
+ };
+
+ gf_msg_debug(this->name, 0, "Initiating child-down timer");
+ delay.tv_sec = 10;
+ delay.tv_nsec = 0;
+ priv->timer = gf_timer_call_after(this->ctx, delay, afr_notify_cbk, this);
+ if (priv->timer == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_TIMER_CREATE_FAIL,
+ "Cannot create timer for delayed initialization");
+ }
+}
- child_index = (long) cookie;
+static int
+find_best_down_child(xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = -1;
+ int32_t best_child = -1;
+ int64_t best_latency = INT64_MAX;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->child_up[i] && priv->child_latency[i] >= 0 &&
+ priv->child_latency[i] < best_latency) {
+ best_child = i;
+ best_latency = priv->child_latency[i];
+ }
+ }
+ if (best_child >= 0) {
+ gf_msg_debug(this->name, 0,
+ "Found best down child (%d) @ %" PRId64 " ms latency",
+ best_child, best_latency);
+ }
+ return best_child;
+}
- if (!child_went_down (op_ret, op_errno) && (op_ret == -1)) {
- local->op_ret = -1;
- local->op_errno = op_errno;
+int
+find_worst_up_child(xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = -1;
+ int32_t worst_child = -1;
+ int64_t worst_latency = INT64_MIN;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->child_up[i] && priv->child_latency[i] >= 0 &&
+ priv->child_latency[i] > worst_latency) {
+ worst_child = i;
+ worst_latency = priv->child_latency[i];
+ }
+ }
+ if (worst_child >= 0) {
+ gf_msg_debug(this->name, 0,
+ "Found worst up child (%d) @ %" PRId64 " ms latency",
+ worst_child, worst_latency);
+ }
+ return worst_child;
+}
- afr_lk_unlock (frame, this);
- return 0;
- }
+void
+__afr_handle_ping_event(xlator_t *this, xlator_t *child_xlator, const int idx,
+ int64_t halo_max_latency_msec, int32_t *event,
+ int64_t child_latency_msec)
+{
+ afr_private_t *priv = NULL;
+ int up_children = 0;
- if (op_ret == 0) {
- local->op_ret = 0;
- local->op_errno = 0;
- local->cont.lk.locked_nodes[child_index] = 1;
- local->cont.lk.ret_flock = *lock;
- }
+ priv = this->private;
- child_index++;
+ priv->child_latency[idx] = child_latency_msec;
+ gf_msg_debug(child_xlator->name, 0, "Client ping @ %" PRId64 " ms",
+ child_latency_msec);
+ if (priv->shd.iamshd)
+ return;
- if (child_index < priv->child_count) {
- STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->lk,
- local->fd, local->cont.lk.cmd,
- &local->cont.lk.user_flock, xdata);
- } else if (local->op_ret == -1) {
- /* all nodes have gone down */
+ up_children = __afr_get_up_children_count(priv);
- AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN,
- &local->cont.lk.ret_flock, NULL);
+ if (child_latency_msec > halo_max_latency_msec &&
+ priv->child_up[idx] == 1 && up_children > priv->halo_min_replicas) {
+ if ((up_children - 1) < priv->halo_min_replicas) {
+ gf_log(child_xlator->name, GF_LOG_INFO,
+ "Overriding halo threshold, "
+ "min replicas: %d",
+ priv->halo_min_replicas);
} else {
- AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.ret_flock, NULL);
+ gf_log(child_xlator->name, GF_LOG_INFO,
+ "Child latency (%" PRId64
+ " ms) "
+ "exceeds halo threshold (%" PRId64
+ "), "
+ "marking child down.",
+ child_latency_msec, halo_max_latency_msec);
+ if (priv->halo_child_up[idx]) {
+ *event = GF_EVENT_CHILD_DOWN;
+ }
+ }
+ } else if (child_latency_msec < halo_max_latency_msec &&
+ priv->child_up[idx] == 0) {
+ if (up_children < priv->halo_max_replicas) {
+ gf_log(child_xlator->name, GF_LOG_INFO,
+ "Child latency (%" PRId64
+ " ms) "
+ "below halo threshold (%" PRId64
+ "), "
+ "marking child up.",
+ child_latency_msec, halo_max_latency_msec);
+ if (priv->halo_child_up[idx]) {
+ *event = GF_EVENT_CHILD_UP;
+ }
+ } else {
+ gf_log(child_xlator->name, GF_LOG_INFO,
+ "Not marking child %d up, "
+ "max replicas (%d) reached.",
+ idx, priv->halo_max_replicas);
}
-
- return 0;
+ }
}
-
-int
-afr_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+static int64_t
+afr_get_halo_latency(xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = 0;
- int32_t op_errno = ENOMEM;
-
- priv = this->private;
-
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
-
- local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count,
- sizeof (*local->cont.lk.locked_nodes),
- gf_afr_mt_char);
-
- if (!local->cont.lk.locked_nodes) {
- op_errno = ENOMEM;
- goto out;
- }
-
- local->fd = fd_ref (fd);
- local->cont.lk.cmd = cmd;
- local->cont.lk.user_flock = *flock;
- local->cont.lk.ret_flock = *flock;
-
- STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0,
- priv->children[i],
- priv->children[i]->fops->lk,
- fd, cmd, flock, xdata);
+ afr_private_t *priv = NULL;
+ int64_t halo_max_latency_msec = 0;
+
+ priv = this->private;
+
+ if (priv->shd.iamshd) {
+ halo_max_latency_msec = priv->shd.halo_max_latency_msec;
+ } else if (priv->nfsd.iamnfsd) {
+ halo_max_latency_msec = priv->nfsd.halo_max_latency_msec;
+ } else {
+ halo_max_latency_msec = priv->halo_max_latency_msec;
+ }
+ gf_msg_debug(this->name, 0, "Using halo latency %" PRId64,
+ halo_max_latency_msec);
+ return halo_max_latency_msec;
+}
- return 0;
+void
+__afr_handle_child_up_event(xlator_t *this, xlator_t *child_xlator,
+ const int idx, int64_t child_latency_msec,
+ int32_t *event, int32_t *call_psh,
+ int32_t *up_child)
+{
+ afr_private_t *priv = NULL;
+ int up_children = 0;
+ int worst_up_child = -1;
+ int64_t halo_max_latency_msec = afr_get_halo_latency(this);
+
+ priv = this->private;
+
+ /*
+ * This only really counts if the child was never up
+ * (value = -1) or had been down (value = 0). See
+ * comment at GF_EVENT_CHILD_DOWN for a more detailed
+ * explanation.
+ */
+ if (priv->child_up[idx] != 1) {
+ priv->event_generation++;
+ }
+ priv->child_up[idx] = 1;
+
+ *call_psh = 1;
+ *up_child = idx;
+ up_children = __afr_get_up_children_count(priv);
+ /*
+ * If this is an _actual_ CHILD_UP event, we
+ * want to set the child_latency to MAX to indicate
+ * the child needs ping data to be available before doing child-up
+ */
+ if (!priv->halo_enabled)
+ goto out;
+
+ if (child_latency_msec < 0) {
+ /*set to INT64_MAX-1 so that it is found for best_down_child*/
+ priv->halo_child_up[idx] = 1;
+ if (priv->child_latency[idx] < 0) {
+ priv->child_latency[idx] = AFR_HALO_MAX_LATENCY;
+ }
+ }
+
+ /*
+ * Handle the edge case where we exceed
+ * halo_min_replicas and we've got a child which is
+ * marked up as it was helping to satisfy the
+ * halo_min_replicas even though it's latency exceeds
+ * halo_max_latency_msec.
+ */
+ if (up_children > priv->halo_min_replicas) {
+ worst_up_child = find_worst_up_child(this);
+ if (worst_up_child >= 0 &&
+ priv->child_latency[worst_up_child] > halo_max_latency_msec) {
+ gf_msg_debug(this->name, 0,
+ "Marking child %d down, "
+ "doesn't meet halo threshold (%" PRId64
+ "), and > "
+ "halo_min_replicas (%d)",
+ worst_up_child, halo_max_latency_msec,
+ priv->halo_min_replicas);
+ priv->child_up[worst_up_child] = 0;
+ up_children--;
+ }
+ }
+
+ if (up_children > priv->halo_max_replicas && !priv->shd.iamshd) {
+ worst_up_child = find_worst_up_child(this);
+ if (worst_up_child < 0) {
+ worst_up_child = idx;
+ }
+ priv->child_up[worst_up_child] = 0;
+ up_children--;
+ gf_msg_debug(this->name, 0,
+ "Marking child %d down, "
+ "up_children (%d) > halo_max_replicas (%d)",
+ worst_up_child, up_children, priv->halo_max_replicas);
+ }
out:
- AFR_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
-
- return 0;
+ if (up_children == 1) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOL_UP,
+ "Subvolume '%s' came back up; "
+ "going online.",
+ child_xlator->name);
+ gf_event(EVENT_AFR_SUBVOL_UP, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
+ } else {
+ *event = GF_EVENT_SOME_DESCENDENT_UP;
+ }
+
+ priv->last_event[idx] = *event;
}
-int
-afr_forget (xlator_t *this, inode_t *inode)
+void
+__afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx,
+ int64_t child_latency_msec, int32_t *event,
+ int32_t *call_psh, int32_t *up_child)
{
- uint64_t ctx_int = 0;
- afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int up_children = 0;
+ int down_children = 0;
+ int best_down_child = -1;
+
+ priv = this->private;
+
+ /*
+ * If a brick is down when we start, we'll get a
+ * CHILD_DOWN to indicate its initial state. There
+ * was never a CHILD_UP in this case, so if we
+ * increment "down_count" the difference between than
+ * and "up_count" will no longer be the number of
+ * children that are currently up. This has serious
+ * implications e.g. for quorum enforcement, so we
+ * don't increment these values unless the event
+ * represents an actual state transition between "up"
+ * (value = 1) and anything else.
+ */
+ if (priv->child_up[idx] == 1) {
+ priv->event_generation++;
+ }
+
+ /*
+ * If this is an _actual_ CHILD_DOWN event, we
+ * want to set the child_latency to < 0 to indicate
+ * the child is really disconnected.
+ */
+ if (child_latency_msec < 0) {
+ priv->child_latency[idx] = child_latency_msec;
+ priv->halo_child_up[idx] = 0;
+ }
+ priv->child_up[idx] = 0;
+
+ up_children = __afr_get_up_children_count(priv);
+ /*
+ * Handle the edge case where we need to find the
+ * next best child (to mark up) as marking this child
+ * down would cause us to fall below halo_min_replicas.
+ * We will also force the SHD to heal this child _now_
+ * as we want it to be up to date if we are going to
+ * begin using it synchronously.
+ */
+ if (priv->halo_enabled && up_children < priv->halo_min_replicas) {
+ best_down_child = find_best_down_child(this);
+ if (best_down_child >= 0) {
+ gf_msg_debug(this->name, 0,
+ "Swapping out child %d for "
+ "child %d to satisfy halo_min_replicas (%d).",
+ idx, best_down_child, priv->halo_min_replicas);
+ priv->child_up[best_down_child] = 1;
+ *call_psh = 1;
+ *up_child = best_down_child;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 0)
+ down_children++;
+ if (down_children == priv->child_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SUBVOLS_DOWN,
+ "All subvolumes are down. Going "
+ "offline until at least one of them "
+ "comes back up.");
+ gf_event(EVENT_AFR_SUBVOLS_DOWN, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
+ } else {
+ *event = GF_EVENT_SOME_DESCENDENT_DOWN;
+ }
+ priv->last_event[idx] = *event;
+}
- inode_ctx_del (inode, this, &ctx_int);
- if (!ctx_int)
- return 0;
+void
+afr_ta_lock_release_synctask(xlator_t *this)
+{
+ call_frame_t *ta_frame = NULL;
+ int ret = 0;
- ctx = (afr_inode_ctx_t *)ctx_int;
- GF_FREE (ctx);
- return 0;
+ ta_frame = afr_ta_frame_create(this);
+ if (!ta_frame) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ "Failed to create ta_frame");
+ return;
+ }
+
+ ret = synctask_new(this->ctx->env, afr_release_notify_lock_for_ta,
+ afr_ta_lock_release_done, ta_frame, this);
+ if (ret) {
+ STACK_DESTROY(ta_frame->root);
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ "Failed to release "
+ "AFR_TA_DOM_NOTIFY lock.");
+ }
}
-int
-afr_priv_dump (xlator_t *this)
+static void
+afr_handle_inodelk_contention(xlator_t *this, struct gf_upcall *upcall)
{
- afr_private_t *priv = NULL;
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
+ struct gf_upcall_inodelk_contention *lc = NULL;
+ unsigned int inmem_count = 0;
+ unsigned int onwire_count = 0;
+ afr_private_t *priv = this->private;
+ lc = upcall->data;
- GF_ASSERT (this);
- priv = this->private;
+ if (strcmp(lc->domain, AFR_TA_DOM_NOTIFY) != 0)
+ return;
- GF_ASSERT (priv);
- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
- gf_proc_dump_add_section(key_prefix);
- gf_proc_dump_write("child_count", "%u", priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- sprintf (key, "child_up[%d]", i);
- gf_proc_dump_write(key, "%d", priv->child_up[i]);
- sprintf (key, "pending_key[%d]", i);
- gf_proc_dump_write(key, "%s", priv->pending_key[i]);
- }
- gf_proc_dump_write("data_self_heal", "%s", priv->data_self_heal);
- gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal);
- gf_proc_dump_write("entry_self_heal", "%d", priv->entry_self_heal);
- gf_proc_dump_write("data_change_log", "%d", priv->data_change_log);
- gf_proc_dump_write("metadata_change_log", "%d", priv->metadata_change_log);
- gf_proc_dump_write("entry-change_log", "%d", priv->entry_change_log);
- gf_proc_dump_write("read_child", "%d", priv->read_child);
- gf_proc_dump_write("favorite_child", "%d", priv->favorite_child);
- gf_proc_dump_write("wait_count", "%u", priv->wait_count);
- gf_proc_dump_write("quorum-reads", "%d", priv->quorum_reads);
+ if (priv->shd.iamshd) {
+ /* shd should ignore AFR_TA_DOM_NOTIFY release requests. */
+ return;
+ }
+ LOCK(&priv->lock);
+ {
+ if (priv->release_ta_notify_dom_lock == _gf_true) {
+ /* Ignore multiple release requests from shds.*/
+ UNLOCK(&priv->lock);
+ return;
+ }
+ priv->release_ta_notify_dom_lock = _gf_true;
+ inmem_count = priv->ta_in_mem_txn_count;
+ onwire_count = priv->ta_on_wire_txn_count;
+ }
+ UNLOCK(&priv->lock);
+ if (inmem_count || onwire_count)
+ /* lock release will happen in txn code path after
+ * in-memory or on-wire txns are over.*/
+ return;
- return 0;
+ afr_ta_lock_release_synctask(this);
}
-
-/**
- * find_child_index - find the child's index in the array of subvolumes
- * @this: AFR
- * @child: child
- */
-
-static int
-find_child_index (xlator_t *this, xlator_t *child)
+static void
+afr_handle_upcall_event(xlator_t *this, struct gf_upcall *upcall)
{
- afr_private_t *priv = NULL;
- int i = -1;
+ struct gf_upcall_cache_invalidation *up_ci = NULL;
+ afr_private_t *priv = this->private;
+ inode_t *inode = NULL;
+ inode_table_t *itable = NULL;
+ int i = 0;
+
+ switch (upcall->event_type) {
+ case GF_UPCALL_INODELK_CONTENTION:
+ afr_handle_inodelk_contention(this, upcall);
+ break;
+ case GF_UPCALL_CACHE_INVALIDATION:
+ up_ci = (struct gf_upcall_cache_invalidation *)upcall->data;
+
+ /* Since md-cache will be aggressively filtering
+ * lookups, the stale read issue will be more
+ * pronounced. Hence when a pending xattr is set notify
+ * all the md-cache clients to invalidate the existing
+ * stat cache and send the lookup next time */
+ if (!up_ci->dict)
+ break;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!dict_get(up_ci->dict, priv->pending_key[i]))
+ continue;
+ up_ci->flags |= UP_INVAL_ATTR;
+ itable = ((xlator_t *)this->graph->top)->itable;
+ /*Internal processes may not have itable for
+ *top xlator*/
+ if (itable)
+ inode = inode_find(itable, upcall->gfid);
+ if (inode)
+ afr_inode_need_refresh_set(inode, this);
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+}
- priv = this->private;
+int32_t
+afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
+{
+ afr_private_t *priv = NULL;
+ xlator_t *child_xlator = NULL;
+ int i = -1;
+ int propagate = 0;
+ int had_heard_from_all = 0;
+ int have_heard_from_all = 0;
+ int idx = -1;
+ int ret = -1;
+ int call_psh = 0;
+ int up_child = -1;
+ dict_t *input = NULL;
+ dict_t *output = NULL;
+ gf_boolean_t had_quorum = _gf_false;
+ gf_boolean_t has_quorum = _gf_false;
+ int64_t halo_max_latency_msec = 0;
+ int64_t child_latency_msec = -1;
+
+ child_xlator = (xlator_t *)data;
+
+ priv = this->private;
+
+ if (!priv)
+ return 0;
- for (i = 0; i < priv->child_count; i++) {
- if ((xlator_t *) child == priv->children[i])
- break;
+ /*
+ * We need to reset this in case children come up in "staggered"
+ * fashion, so that we discover a late-arriving local subvolume. Note
+ * that we could end up issuing N lookups to the first subvolume, and
+ * O(N^2) overall, but N is small for AFR so it shouldn't be an issue.
+ */
+ priv->did_discovery = _gf_false;
+
+ /* parent xlators don't need to know about every child_up, child_down
+ * because of afr ha. If all subvolumes go down, child_down has
+ * to be triggered. In that state when 1 subvolume comes up child_up
+ * needs to be triggered. dht optimizes revalidate lookup by sending
+ * it only to one of its subvolumes. When child up/down happens
+ * for afr's subvolumes dht should be notified by child_modified. The
+ * subsequent revalidate lookup happens on all the dht's subvolumes
+ * which triggers afr self-heals if any.
+ */
+ idx = afr_find_child_index(this, child_xlator);
+ if (idx < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_CHILD_UP,
+ "Received child_up from invalid subvolume");
+ goto out;
+ }
+
+ had_quorum = priv->quorum_count &&
+ afr_has_quorum(priv->child_up, this, NULL);
+ if (event == GF_EVENT_CHILD_PING) {
+ child_latency_msec = (int64_t)(uintptr_t)data2;
+ if (priv->halo_enabled) {
+ halo_max_latency_msec = afr_get_halo_latency(this);
+
+ /* Calculates the child latency and sets event
+ */
+ LOCK(&priv->lock);
+ {
+ __afr_handle_ping_event(this, child_xlator, idx,
+ halo_max_latency_msec, &event,
+ child_latency_msec);
+ }
+ UNLOCK(&priv->lock);
+ } else {
+ LOCK(&priv->lock);
+ {
+ priv->child_latency[idx] = child_latency_msec;
+ }
+ UNLOCK(&priv->lock);
}
+ }
- return i;
-}
-
-int32_t
-afr_notify (xlator_t *this, int32_t event,
- void *data, void *data2)
-{
- afr_private_t *priv = NULL;
- int i = -1;
- int up_children = 0;
- int down_children = 0;
- int propagate = 0;
- int had_heard_from_all = 0;
- int have_heard_from_all = 0;
- int idx = -1;
- int ret = -1;
- int call_psh = 0;
- int up_child = -1;
- dict_t *input = NULL;
- dict_t *output = NULL;
- gf_boolean_t had_quorum = _gf_false;
- gf_boolean_t has_quorum = _gf_false;
-
- priv = this->private;
-
- if (!priv)
- return 0;
-
- /*
- * We need to reset this in case children come up in "staggered"
- * fashion, so that we discover a late-arriving local subvolume. Note
- * that we could end up issuing N lookups to the first subvolume, and
- * O(N^2) overall, but N is small for AFR so it shouldn't be an issue.
+ if (event == GF_EVENT_CHILD_PING) {
+ /* This is the only xlator that handles PING, no reason to
+ * propagate.
*/
- priv->did_discovery = _gf_false;
+ goto out;
+ }
- had_heard_from_all = 1;
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->last_event[i]) {
- had_heard_from_all = 0;
- }
+ if (event == GF_EVENT_TRANSLATOR_OP) {
+ LOCK(&priv->lock);
+ {
+ had_heard_from_all = __get_heard_from_all_status(this);
}
+ UNLOCK(&priv->lock);
- /* parent xlators dont need to know about every child_up, child_down
- * because of afr ha. If all subvolumes go down, child_down has
- * to be triggered. In that state when 1 subvolume comes up child_up
- * needs to be triggered. dht optimizes revalidate lookup by sending
- * it only to one of its subvolumes. When child up/down happens
- * for afr's subvolumes dht should be notified by child_modified. The
- * subsequent revalidate lookup happens on all the dht's subvolumes
- * which triggers afr self-heals if any.
- */
- idx = find_child_index (this, data);
- if (idx < 0) {
- gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_CHILD_UP,
- "Received child_up from invalid subvolume");
- goto out;
+ if (!had_heard_from_all) {
+ ret = -1;
+ } else {
+ input = data;
+ output = data2;
+ ret = afr_xl_op(this, input, output);
}
+ goto out;
+ }
+
+ if (event == GF_EVENT_UPCALL) {
+ afr_handle_upcall_event(this, data);
+ }
- had_quorum = priv->quorum_count && afr_has_quorum (priv->child_up,
- this);
+ LOCK(&priv->lock);
+ {
+ had_heard_from_all = __get_heard_from_all_status(this);
switch (event) {
- case GF_EVENT_CHILD_UP:
- LOCK (&priv->lock);
- {
- /*
- * This only really counts if the child was never up
- * (value = -1) or had been down (value = 0). See
- * comment at GF_EVENT_CHILD_DOWN for a more detailed
- * explanation.
- */
- if (priv->child_up[idx] != 1) {
- priv->up_count++;
- priv->event_generation++;
- }
- priv->child_up[idx] = 1;
-
- call_psh = 1;
- up_child = idx;
- for (i = 0; i < priv->child_count; i++)
- if (priv->child_up[i] == 1)
- up_children++;
- if (up_children == 1) {
- gf_msg (this->name, GF_LOG_INFO, 0,
- AFR_MSG_SUBVOL_UP,
- "Subvolume '%s' came back up; "
- "going online.", ((xlator_t *)data)->name);
- } else {
- event = GF_EVENT_CHILD_MODIFIED;
- }
-
- priv->last_event[idx] = event;
+ case GF_EVENT_PARENT_UP:
+ __afr_launch_notify_timer(this, priv);
+ propagate = 1;
+ break;
+ case GF_EVENT_CHILD_UP:
+ if (priv->thin_arbiter_count &&
+ (idx == AFR_CHILD_THIN_ARBITER)) {
+ priv->ta_child_up = 1;
+ priv->ta_event_gen++;
+ break;
}
- UNLOCK (&priv->lock);
-
+ __afr_handle_child_up_event(this, child_xlator, idx,
+ child_latency_msec, &event,
+ &call_psh, &up_child);
+ __afr_lock_heal_synctask(this, priv, idx);
break;
- case GF_EVENT_CHILD_DOWN:
- LOCK (&priv->lock);
- {
- /*
- * If a brick is down when we start, we'll get a
- * CHILD_DOWN to indicate its initial state. There
- * was never a CHILD_UP in this case, so if we
- * increment "down_count" the difference between than
- * and "up_count" will no longer be the number of
- * children that are currently up. This has serious
- * implications e.g. for quorum enforcement, so we
- * don't increment these values unless the event
- * represents an actual state transition between "up"
- * (value = 1) and anything else.
- */
- if (priv->child_up[idx] == 1) {
- priv->down_count++;
- priv->event_generation++;
- }
- priv->child_up[idx] = 0;
-
- for (i = 0; i < priv->child_count; i++)
- if (priv->child_up[i] == 0)
- down_children++;
- if (down_children == priv->child_count) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- AFR_MSG_ALL_SUBVOLS_DOWN,
- "All subvolumes are down. Going offline "
- "until atleast one of them comes back up.");
- } else {
- event = GF_EVENT_CHILD_MODIFIED;
- }
-
- priv->last_event[idx] = event;
+ case GF_EVENT_CHILD_DOWN:
+ if (priv->thin_arbiter_count &&
+ (idx == AFR_CHILD_THIN_ARBITER)) {
+ priv->ta_child_up = 0;
+ priv->ta_event_gen++;
+ afr_ta_locked_priv_invalidate(priv);
+ break;
}
- UNLOCK (&priv->lock);
-
+ __afr_handle_child_down_event(this, child_xlator, idx,
+ child_latency_msec, &event,
+ &call_psh, &up_child);
+ __afr_mark_pending_lk_heal(this, priv, idx);
break;
- case GF_EVENT_CHILD_CONNECTING:
- LOCK (&priv->lock);
- {
- priv->last_event[idx] = event;
- }
- UNLOCK (&priv->lock);
+ case GF_EVENT_CHILD_CONNECTING:
+ priv->last_event[idx] = event;
break;
- case GF_EVENT_TRANSLATOR_OP:
- input = data;
- output = data2;
- if (!had_heard_from_all) {
- ret = -1;
- goto out;
- }
- ret = afr_xl_op (this, input, output);
- goto out;
+ case GF_EVENT_SOME_DESCENDENT_DOWN:
+ priv->last_event[idx] = event;
break;
-
- default:
+ default:
propagate = 1;
break;
}
+ have_heard_from_all = __get_heard_from_all_status(this);
+ if (!had_heard_from_all && have_heard_from_all) {
+ if (priv->timer) {
+ gf_timer_call_cancel(this->ctx, priv->timer);
+ priv->timer = NULL;
+ }
+ /* This is the first event which completes aggregation
+ of events from all subvolumes. If at least one subvol
+ had come up, propagate CHILD_UP, but only this time
+ */
+ event = GF_EVENT_CHILD_DOWN;
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->last_event[i] == GF_EVENT_CHILD_UP) {
+ event = GF_EVENT_CHILD_UP;
+ break;
+ }
- if (priv->quorum_count) {
- has_quorum = afr_has_quorum (priv->child_up, this);
- if (!had_quorum && has_quorum)
- gf_msg (this->name, GF_LOG_INFO, 0, AFR_MSG_QUORUM_MET,
- "Client-quorum is met");
- if (had_quorum && !has_quorum)
- gf_msg (this->name, GF_LOG_WARNING, 0,
- AFR_MSG_QUORUM_FAIL,
- "Client-quorum is not met");
+ if (priv->last_event[i] == GF_EVENT_CHILD_CONNECTING) {
+ event = GF_EVENT_CHILD_CONNECTING;
+ /* continue to check other events for CHILD_UP */
+ }
+ }
+ }
+ }
+ UNLOCK(&priv->lock);
+
+ if (priv->quorum_count) {
+ has_quorum = afr_has_quorum(priv->child_up, this, NULL);
+ if (!had_quorum && has_quorum) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_QUORUM_MET,
+ "Client-quorum is met");
+ gf_event(EVENT_AFR_QUORUM_MET, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
+ }
+ if (had_quorum && !has_quorum) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL,
+ "Client-quorum is not met");
+ gf_event(EVENT_AFR_QUORUM_FAIL, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
+ }
+ }
+
+ /* if all subvols have reported status, no need to hide anything
+ or wait for anything else. Just propagate blindly */
+ if (have_heard_from_all)
+ propagate = 1;
+
+ ret = 0;
+ if (propagate)
+ ret = default_notify(this, event, data);
+
+ if ((!had_heard_from_all) || call_psh) {
+ /* Launch self-heal on all local subvolumes if:
+ * a) We have_heard_from_all for the first time
+ * b) Already heard from everyone, but we now got a child-up
+ * event.
+ */
+ if (have_heard_from_all) {
+ afr_selfheal_childup(this, priv);
}
+ }
+out:
+ return ret;
+}
- /* have all subvolumes reported status once by now? */
- have_heard_from_all = 1;
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->last_event[i])
- have_heard_from_all = 0;
- }
+int
+afr_local_init(afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
+{
+ int __ret = -1;
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+
+ __ret = syncbarrier_init(&local->barrier);
+ if (__ret) {
+ if (op_errno)
+ *op_errno = __ret;
+ goto out;
+ }
+
+ local->child_up = GF_MALLOC(priv->child_count * sizeof(*local->child_up),
+ gf_afr_mt_char);
+ if (!local->child_up) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ memcpy(local->child_up, priv->child_up,
+ sizeof(*local->child_up) * priv->child_count);
+ local->call_count = AFR_COUNT(local->child_up, priv->child_count);
+ if (local->call_count == 0) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOLS_DOWN,
+ "no subvolumes up");
+ if (op_errno)
+ *op_errno = ENOTCONN;
+ goto out;
+ }
+
+ local->event_generation = priv->event_generation;
+
+ local->read_attempted = GF_CALLOC(priv->child_count, sizeof(char),
+ gf_afr_mt_char);
+ if (!local->read_attempted) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->readable = GF_CALLOC(priv->child_count, sizeof(char),
+ gf_afr_mt_char);
+ if (!local->readable) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->readable2 = GF_CALLOC(priv->child_count, sizeof(char),
+ gf_afr_mt_char);
+ if (!local->readable2) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->read_subvol = -1;
+
+ local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies),
+ gf_afr_mt_reply_t);
+ if (!local->replies) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->need_full_crawl = _gf_false;
+ if (priv->thin_arbiter_count) {
+ local->ta_child_up = priv->ta_child_up;
+ local->ta_failed_subvol = AFR_CHILD_UNKNOWN;
+ local->read_txn_query_child = AFR_CHILD_UNKNOWN;
+ local->ta_event_gen = priv->ta_event_gen;
+ local->fop_state = TA_SUCCESS;
+ }
+ local->is_new_entry = _gf_false;
+
+ INIT_LIST_HEAD(&local->healer);
+ return 0;
+out:
+ return -1;
+}
- /* if all subvols have reported status, no need to hide anything
- or wait for anything else. Just propagate blindly */
- if (have_heard_from_all)
- propagate = 1;
+int
+afr_internal_lock_init(afr_internal_lock_t *lk, size_t child_count)
+{
+ int ret = -ENOMEM;
- if (!had_heard_from_all && have_heard_from_all) {
- /* This is the first event which completes aggregation
- of events from all subvolumes. If at least one subvol
- had come up, propagate CHILD_UP, but only this time
- */
- event = GF_EVENT_CHILD_DOWN;
-
- LOCK (&priv->lock);
- {
- up_children = AFR_COUNT (priv->child_up, priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- if (priv->last_event[i] == GF_EVENT_CHILD_UP) {
- event = GF_EVENT_CHILD_UP;
- break;
- }
-
- if (priv->last_event[i] ==
- GF_EVENT_CHILD_CONNECTING) {
- event = GF_EVENT_CHILD_CONNECTING;
- /* continue to check other events for CHILD_UP */
- }
- }
- }
- UNLOCK (&priv->lock);
- }
+ lk->lower_locked_nodes = GF_CALLOC(sizeof(*lk->lower_locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->lower_locked_nodes)
+ goto out;
- ret = 0;
- if (propagate)
- ret = default_notify (this, event, data);
+ lk->lock_op_ret = -1;
+ lk->lock_op_errno = EUCLEAN;
- if (!had_heard_from_all && have_heard_from_all && priv->shd.iamshd) {
- /*
- * Since self-heal is supposed to be launched only after
- * the responses from all the bricks are collected,
- * launch self-heals now on all up subvols.
- */
- for (i = 0; i < priv->child_count; i++)
- if (priv->child_up[i])
- afr_selfheal_childup (this, i);
- } else if (have_heard_from_all && call_psh && priv->shd.iamshd) {
- /*
- * Already heard from everyone. Just launch heal on now up
- * subvolume.
- */
- afr_selfheal_childup (this, up_child);
- }
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
-int
-afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
+void
+afr_matrix_cleanup(int32_t **matrix, unsigned int m)
{
- local->op_ret = -1;
- local->op_errno = EUCLEAN;
+ int i = 0;
- syncbarrier_init (&local->barrier);
+ if (!matrix)
+ goto out;
+ for (i = 0; i < m; i++) {
+ GF_FREE(matrix[i]);
+ }
- local->child_up = GF_CALLOC (priv->child_count,
- sizeof (*local->child_up),
- gf_afr_mt_char);
- if (!local->child_up) {
- if (op_errno)
- *op_errno = ENOMEM;
- goto out;
- }
+ GF_FREE(matrix);
+out:
+ return;
+}
- memcpy (local->child_up, priv->child_up,
- sizeof (*local->child_up) * priv->child_count);
- local->call_count = AFR_COUNT (local->child_up, priv->child_count);
- if (local->call_count == 0) {
- gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up");
- if (op_errno)
- *op_errno = ENOTCONN;
- goto out;
- }
- local->event_generation = priv->event_generation;
-
- local->read_attempted = GF_CALLOC (priv->child_count, sizeof (char),
- gf_afr_mt_char);
- if (!local->read_attempted) {
- if (op_errno)
- *op_errno = ENOMEM;
- goto out;
- }
-
- local->readable = GF_CALLOC (priv->child_count, sizeof (char),
- gf_afr_mt_char);
- if (!local->readable) {
- if (op_errno)
- *op_errno = ENOMEM;
- goto out;
- }
-
- local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies),
- gf_afr_mt_reply_t);
- if (!local->replies) {
- if (op_errno)
- *op_errno = ENOMEM;
- goto out;
- }
-
- return 0;
+int32_t **
+afr_matrix_create(unsigned int m, unsigned int n)
+{
+ int32_t **matrix = NULL;
+ int i = 0;
+
+ matrix = GF_CALLOC(sizeof(*matrix), m, gf_afr_mt_int32_t);
+ if (!matrix)
+ goto out;
+
+ for (i = 0; i < m; i++) {
+ matrix[i] = GF_CALLOC(sizeof(*matrix[i]), n, gf_afr_mt_int32_t);
+ if (!matrix[i])
+ goto out;
+ }
+ return matrix;
out:
- return -1;
+ afr_matrix_cleanup(matrix, m);
+ return NULL;
}
int
-afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
- transaction_lk_type_t lk_type)
+afr_transaction_local_init(afr_local_t *local, xlator_t *this)
{
- int ret = -ENOMEM;
+ int ret = -ENOMEM;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ INIT_LIST_HEAD(&local->transaction.wait_list);
+ INIT_LIST_HEAD(&local->transaction.owner_list);
+ INIT_LIST_HEAD(&local->ta_waitq);
+ INIT_LIST_HEAD(&local->ta_onwireq);
+ ret = afr_internal_lock_init(&local->internal_lock, priv->child_count);
+ if (ret < 0)
+ goto out;
+
+ ret = -ENOMEM;
+ local->pre_op_compat = priv->pre_op_compat;
+
+ local->transaction.pre_op = GF_CALLOC(sizeof(*local->transaction.pre_op),
+ priv->child_count, gf_afr_mt_char);
+ if (!local->transaction.pre_op)
+ goto out;
+
+ local->transaction.changelog_xdata = GF_CALLOC(
+ sizeof(*local->transaction.changelog_xdata), priv->child_count,
+ gf_afr_mt_dict_t);
+ if (!local->transaction.changelog_xdata)
+ goto out;
+
+ if (priv->arbiter_count == 1) {
+ local->transaction.pre_op_sources = GF_CALLOC(
+ sizeof(*local->transaction.pre_op_sources), priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.pre_op_sources)
+ goto out;
+ }
+
+ local->transaction.failed_subvols = GF_CALLOC(
+ sizeof(*local->transaction.failed_subvols), priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.failed_subvols)
+ goto out;
+
+ local->pending = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!local->pending)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
- lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->locked_nodes)
- goto out;
+void
+afr_set_low_priority(call_frame_t *frame)
+{
+ frame->root->pid = LOW_PRIO_PROC_PID;
+}
- lk->lower_locked_nodes = GF_CALLOC (sizeof (*lk->lower_locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->lower_locked_nodes)
- goto out;
+void
+afr_priv_destroy(afr_private_t *priv)
+{
+ int i = 0;
+ int child_count = -1;
+
+ if (!priv)
+ goto out;
+
+ GF_FREE(priv->sh_domain);
+ GF_FREE(priv->last_event);
+
+ child_count = priv->child_count;
+ if (priv->thin_arbiter_count) {
+ child_count++;
+ }
+ if (priv->pending_key) {
+ for (i = 0; i < child_count; i++)
+ GF_FREE(priv->pending_key[i]);
+ }
+
+ GF_FREE(priv->pending_reads);
+ GF_FREE(priv->local);
+ GF_FREE(priv->pending_key);
+ GF_FREE(priv->children);
+ GF_FREE(priv->anon_inode);
+ GF_FREE(priv->child_up);
+ GF_FREE(priv->halo_child_up);
+ GF_FREE(priv->child_latency);
+ LOCK_DESTROY(&priv->lock);
+
+ GF_FREE(priv);
+out:
+ return;
+}
- lk->lock_op_ret = -1;
- lk->lock_op_errno = EUCLEAN;
- lk->transaction_lk_type = lk_type;
+int **
+afr_mark_pending_changelog(afr_private_t *priv, unsigned char *pending,
+ dict_t *xattr, ia_type_t iat)
+{
+ int i = 0;
+ int **changelog = NULL;
+ int idx = -1;
+ int m_idx = 0;
+ int d_idx = 0;
+ int ret = 0;
+
+ m_idx = afr_index_for_transaction_type(AFR_METADATA_TRANSACTION);
+ d_idx = afr_index_for_transaction_type(AFR_DATA_TRANSACTION);
+
+ idx = afr_index_from_ia_type(iat);
+
+ changelog = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!changelog)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!pending[i])
+ continue;
+
+ changelog[i][m_idx] = hton32(1);
+ if (idx != -1)
+ changelog[i][idx] = hton32(1);
+ /* If the newentry marking is on a newly created directory,
+ * then mark it with the full-heal indicator.
+ */
+ if ((IA_ISDIR(iat)) && (priv->esh_granular))
+ changelog[i][d_idx] = hton32(1);
+ }
+ ret = afr_set_pending_dict(priv, xattr, changelog);
+ if (ret < 0) {
+ afr_matrix_cleanup(changelog, priv->child_count);
+ return NULL;
+ }
+out:
+ return changelog;
+}
- ret = 0;
+static dict_t *
+afr_set_heal_info(char *status)
+{
+ dict_t *dict = NULL;
+ int ret = -1;
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_sizen(dict, "heal-info", status);
+ if (ret)
+ gf_msg("", GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Failed to set heal-info key to "
+ "%s",
+ status);
out:
- return ret;
+ /* Any error other than EINVAL, dict_set_dynstr frees status */
+ if (ret == -ENOMEM || ret == -EINVAL) {
+ GF_FREE(status);
+ }
+
+ if (ret && dict) {
+ dict_unref(dict);
+ dict = NULL;
+ }
+ return dict;
}
-void
-afr_matrix_cleanup (int32_t **matrix, unsigned int m)
+static gf_boolean_t
+afr_is_dirty_count_non_unary_for_txn(xlator_t *this, struct afr_reply *replies,
+ afr_transaction_type type)
{
- int i = 0;
+ afr_private_t *priv = this->private;
+ int *dirty = alloca0(priv->child_count * sizeof(int));
+ int i = 0;
- if (!matrix)
- goto out;
- for (i = 0; i < m; i++) {
- GF_FREE (matrix[i]);
- }
+ afr_selfheal_extract_xattr(this, replies, type, dirty, NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (dirty[i] > 1)
+ return _gf_true;
+ }
- GF_FREE (matrix);
-out:
- return;
+ return _gf_false;
}
-int32_t**
-afr_matrix_create (unsigned int m, unsigned int n)
+static gf_boolean_t
+afr_is_dirty_count_non_unary(xlator_t *this, struct afr_reply *replies,
+ ia_type_t ia_type)
{
- int32_t **matrix = NULL;
- int i = 0;
+ gf_boolean_t data_chk = _gf_false;
+ gf_boolean_t mdata_chk = _gf_false;
+ gf_boolean_t entry_chk = _gf_false;
+
+ switch (ia_type) {
+ case IA_IFDIR:
+ mdata_chk = _gf_true;
+ entry_chk = _gf_true;
+ break;
+ case IA_IFREG:
+ mdata_chk = _gf_true;
+ data_chk = _gf_true;
+ break;
+ default:
+ /*IA_IFBLK, IA_IFCHR, IA_IFLNK, IA_IFIFO, IA_IFSOCK*/
+ mdata_chk = _gf_true;
+ break;
+ }
- matrix = GF_CALLOC (sizeof (*matrix), m, gf_afr_mt_int32_t);
- if (!matrix)
- goto out;
+ if (data_chk && afr_is_dirty_count_non_unary_for_txn(
+ this, replies, AFR_DATA_TRANSACTION)) {
+ return _gf_true;
+ } else if (mdata_chk && afr_is_dirty_count_non_unary_for_txn(
+ this, replies, AFR_METADATA_TRANSACTION)) {
+ return _gf_true;
+ } else if (entry_chk && afr_is_dirty_count_non_unary_for_txn(
+ this, replies, AFR_ENTRY_TRANSACTION)) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
- for (i = 0; i < m; i++) {
- matrix[i] = GF_CALLOC (sizeof (*matrix[i]), n,
- gf_afr_mt_int32_t);
- if (!matrix[i])
- goto out;
+static int
+afr_update_heal_status(xlator_t *this, struct afr_reply *replies,
+ ia_type_t ia_type, gf_boolean_t *esh, gf_boolean_t *dsh,
+ gf_boolean_t *msh, unsigned char pending)
+{
+ int ret = -1;
+ GF_UNUSED int ret1 = 0;
+ int i = 0;
+ int io_domain_lk_count = 0;
+ int shd_domain_lk_count = 0;
+ afr_private_t *priv = NULL;
+ char *key1 = NULL;
+ char *key2 = NULL;
+
+ priv = this->private;
+ key1 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(this->name));
+ key2 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(priv->sh_domain));
+ sprintf(key1, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, this->name);
+ sprintf(key2, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, priv->sh_domain);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((replies[i].valid != 1) || (replies[i].op_ret != 0))
+ continue;
+ if (!io_domain_lk_count) {
+ ret1 = dict_get_int32(replies[i].xdata, key1, &io_domain_lk_count);
+ }
+ if (!shd_domain_lk_count) {
+ ret1 = dict_get_int32(replies[i].xdata, key2, &shd_domain_lk_count);
+ }
+ }
+
+ if (!pending) {
+ if ((afr_is_dirty_count_non_unary(this, replies, ia_type)) ||
+ (!io_domain_lk_count)) {
+ /* Needs heal. */
+ ret = 0;
+ } else {
+ /* No heal needed. */
+ *dsh = *esh = *msh = 0;
}
- return matrix;
-out:
- afr_matrix_cleanup (matrix, m);
- return NULL;
+ } else {
+ if (shd_domain_lk_count) {
+ ret = -EAGAIN; /*For 'possibly-healing'. */
+ } else {
+ ret = 0; /*needs heal. Just set a non -ve value so that it is
+ assumed as the source index.*/
+ }
+ }
+ return ret;
}
+/*return EIO, EAGAIN or pending*/
int
-afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count)
+afr_lockless_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
+ inode_t **inode, gf_boolean_t *entry_selfheal,
+ gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal, unsigned char *pending)
{
- int ret = -ENOMEM;
+ int ret = -1;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ gf_boolean_t dsh = _gf_false;
+ gf_boolean_t msh = _gf_false;
+ gf_boolean_t esh = _gf_false;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *valid_on = NULL;
+ uint64_t *witness = NULL;
+
+ priv = this->private;
+ replies = alloca0(sizeof(*replies) * priv->child_count);
+ sources = alloca0(sizeof(*sources) * priv->child_count);
+ sinks = alloca0(sizeof(*sinks) * priv->child_count);
+ witness = alloca0(sizeof(*witness) * priv->child_count);
+ valid_on = alloca0(sizeof(*valid_on) * priv->child_count);
+
+ ret = afr_selfheal_unlocked_inspect(frame, this, gfid, inode, &dsh, &msh,
+ &esh, replies);
+ if (ret)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ valid_on[i] = 1;
+ }
+ }
+ if (msh) {
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_METADATA_TRANSACTION, valid_on,
+ sources, sinks, witness, pending);
+ if (*pending & PFLAG_SBRAIN)
+ ret = -EIO;
+ if (ret)
+ goto out;
+ }
+ if (dsh) {
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_DATA_TRANSACTION, valid_on,
+ sources, sinks, witness, pending);
+ if (*pending & PFLAG_SBRAIN)
+ ret = -EIO;
+ if (ret)
+ goto out;
+ }
+ if (esh) {
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_ENTRY_TRANSACTION, valid_on,
+ sources, sinks, witness, pending);
+ if (*pending & PFLAG_SBRAIN)
+ ret = -EIO;
+ if (ret)
+ goto out;
+ }
- lk->domain = dom;
- lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->locked_nodes)
- goto out;
- ret = 0;
+ ret = afr_update_heal_status(this, replies, (*inode)->ia_type, &esh, &dsh,
+ &msh, *pending);
out:
- return ret;
+ *data_selfheal = dsh;
+ *entry_selfheal = esh;
+ *metadata_selfheal = msh;
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
+ return ret;
}
int
-afr_transaction_local_init (afr_local_t *local, xlator_t *this)
+afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- int child_up_count = 0;
- int ret = -ENOMEM;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- ret = afr_internal_lock_init (&local->internal_lock, priv->child_count,
- AFR_TRANSACTION_LK);
- if (ret < 0)
+ gf_boolean_t data_selfheal = _gf_false;
+ gf_boolean_t metadata_selfheal = _gf_false;
+ gf_boolean_t entry_selfheal = _gf_false;
+ unsigned char pending = 0;
+ dict_t *dict = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+ inode_t *inode = NULL;
+ char *substr = NULL;
+ char *status = NULL;
+ call_frame_t *heal_frame = NULL;
+ afr_local_t *heal_local = NULL;
+
+ /*Use frame with lk-owner set*/
+ heal_frame = afr_frame_create(frame->this, &op_errno);
+ if (!heal_frame) {
+ ret = -1;
+ goto out;
+ }
+ heal_local = heal_frame->local;
+ heal_frame->local = frame->local;
+
+ ret = afr_lockless_inspect(heal_frame, this, loc->gfid, &inode,
+ &entry_selfheal, &data_selfheal,
+ &metadata_selfheal, &pending);
+
+ if (ret == -ENOMEM) {
+ ret = -1;
+ goto out;
+ }
+
+ if (pending & PFLAG_PENDING) {
+ gf_asprintf(&substr, "-pending");
+ if (!substr)
+ goto out;
+ }
+
+ if (ret == -EIO) {
+ ret = gf_asprintf(&status, "split-brain%s", substr ? substr : "");
+ if (ret < 0) {
+ goto out;
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ } else if (ret == -EAGAIN) {
+ ret = gf_asprintf(&status, "possibly-healing%s", substr ? substr : "");
+ if (ret < 0) {
+ goto out;
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ } else if (ret >= 0) {
+ /* value of ret = source index
+ * so ret >= 0 and at least one of the 3 booleans set to
+ * true means a source is identified; heal is required.
+ */
+ if (!data_selfheal && !entry_selfheal && !metadata_selfheal) {
+ status = gf_strdup("no-heal");
+ if (!status) {
+ ret = -1;
goto out;
-
- if ((local->transaction.type == AFR_DATA_TRANSACTION) ||
- (local->transaction.type == AFR_METADATA_TRANSACTION)) {
- ret = afr_inodelk_init (&local->internal_lock.inodelk[0],
- this->name, priv->child_count);
- if (ret < 0)
- goto out;
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ } else {
+ ret = gf_asprintf(&status, "heal%s", substr ? substr : "");
+ if (ret < 0) {
+ goto out;
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ }
+ } else if (ret < 0) {
+ /* Apart from above checked -ve ret values, there are
+ * other possible ret values like ENOTCONN
+ * (returned when number of valid replies received are
+ * less than 2)
+ * in which case heal is required when one of the
+ * selfheal booleans is set.
+ */
+ if (data_selfheal || entry_selfheal || metadata_selfheal) {
+ ret = gf_asprintf(&status, "heal%s", substr ? substr : "");
+ if (ret < 0) {
+ goto out;
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
}
+ }
- ret = -ENOMEM;
- child_up_count = AFR_COUNT (local->child_up, priv->child_count);
- if (priv->optimistic_change_log && child_up_count == priv->child_count)
- local->optimistic_change_log = 1;
+ ret = 0;
+ op_errno = 0;
+
+out:
+ if (heal_frame) {
+ heal_frame->local = heal_local;
+ AFR_STACK_DESTROY(heal_frame);
+ }
+ AFR_STACK_UNWIND(getxattr, frame, ret, op_errno, dict, NULL);
+ if (dict)
+ dict_unref(dict);
+ if (inode)
+ inode_unref(inode);
+ GF_FREE(substr);
+ return ret;
+}
- local->pre_op_compat = priv->pre_op_compat;
+int
+_afr_is_split_brain(call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies, afr_transaction_type type,
+ gf_boolean_t *spb)
+{
+ afr_private_t *priv = NULL;
+ uint64_t *witness = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ int sources_count = 0;
+ int ret = 0;
+
+ priv = this->private;
+
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ witness = alloca0(priv->child_count * sizeof(*witness));
+
+ ret = afr_selfheal_find_direction(frame, this, replies, type,
+ priv->child_up, sources, sinks, witness,
+ NULL);
+ if (ret)
+ return ret;
- local->transaction.eager_lock =
- GF_CALLOC (sizeof (*local->transaction.eager_lock),
- priv->child_count,
- gf_afr_mt_int32_t);
+ sources_count = AFR_COUNT(sources, priv->child_count);
+ if (!sources_count)
+ *spb = _gf_true;
- if (!local->transaction.eager_lock)
- goto out;
+ return ret;
+}
- local->transaction.pre_op = GF_CALLOC (sizeof (*local->transaction.pre_op),
- priv->child_count,
- gf_afr_mt_char);
- if (!local->transaction.pre_op)
- goto out;
+int
+afr_is_split_brain(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ uuid_t gfid, gf_boolean_t *d_spb, gf_boolean_t *m_spb)
+{
+ int ret = -1;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
- local->transaction.fop_subvols = GF_CALLOC (sizeof (*local->transaction.fop_subvols),
- priv->child_count,
- gf_afr_mt_char);
- if (!local->transaction.fop_subvols)
- goto out;
+ priv = this->private;
- local->transaction.failed_subvols = GF_CALLOC (sizeof (*local->transaction.failed_subvols),
- priv->child_count,
- gf_afr_mt_char);
- if (!local->transaction.failed_subvols)
- goto out;
+ replies = alloca0(sizeof(*replies) * priv->child_count);
- local->pending = afr_matrix_create (priv->child_count,
- AFR_NUM_CHANGE_LOGS);
- if (!local->pending)
- goto out;
+ ret = afr_selfheal_unlocked_discover(frame, inode, gfid, replies);
+ if (ret)
+ goto out;
- INIT_LIST_HEAD (&local->transaction.eager_locked);
+ if (!afr_can_decide_split_brain_source_sinks(replies, priv->child_count)) {
+ ret = -EAGAIN;
+ goto out;
+ }
- ret = 0;
+ ret = _afr_is_split_brain(frame, this, replies, AFR_DATA_TRANSACTION,
+ d_spb);
+ if (ret)
+ goto out;
+
+ ret = _afr_is_split_brain(frame, this, replies, AFR_METADATA_TRANSACTION,
+ m_spb);
out:
- return ret;
+ if (replies) {
+ afr_replies_wipe(replies, priv->child_count);
+ replies = NULL;
+ }
+ return ret;
}
-
-void
-afr_set_low_priority (call_frame_t *frame)
+int
+afr_get_split_brain_status_cbk(int ret, call_frame_t *frame, void *opaque)
{
- frame->root->pid = LOW_PRIO_PROC_PID;
+ GF_FREE(opaque);
+ return 0;
}
-
-gf_boolean_t
-afr_have_quorum (char *logname, afr_private_t *priv)
+int
+afr_get_split_brain_status(void *opaque)
{
- unsigned int quorum = 0;
+ gf_boolean_t d_spb = _gf_false;
+ gf_boolean_t m_spb = _gf_false;
+ int ret = -1;
+ int op_errno = 0;
+ int i = 0;
+ char *choices = NULL;
+ char *status = NULL;
+ dict_t *dict = NULL;
+ inode_t *inode = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ loc_t *loc = NULL;
+ afr_spb_status_t *data = NULL;
+
+ data = opaque;
+ frame = data->frame;
+ this = frame->this;
+ loc = data->loc;
+ priv = this->private;
+ children = priv->children;
+
+ inode = afr_inode_find(this, loc->gfid);
+ if (!inode)
+ goto out;
+
+ dict = dict_new();
+ if (!dict) {
+ op_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ /* Calculation for string length :
+ * (child_count X length of child-name) + SLEN(" Choices :")
+ * child-name consists of :
+ * a) 251 = max characters for volname according to GD_VOLUME_NAME_MAX
+ * b) strlen("-client-00,") assuming 16 replicas
+ */
+ choices = alloca0(priv->child_count * (256 + SLEN("-client-00,")) +
+ SLEN(" Choices:"));
+
+ ret = afr_is_split_brain(frame, this, inode, loc->gfid, &d_spb, &m_spb);
+ if (ret) {
+ op_errno = -ret;
+ if (ret == -EAGAIN) {
+ ret = dict_set_sizen_str_sizen(dict, GF_AFR_SBRAIN_STATUS,
+ SBRAIN_HEAL_NO_GO_MSG);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ AFR_MSG_DICT_SET_FAILED,
+ "Failed to set GF_AFR_SBRAIN_STATUS in dict");
+ }
+ }
+ ret = -1;
+ goto out;
+ }
+
+ if (d_spb || m_spb) {
+ sprintf(choices, " Choices:");
+ for (i = 0; i < priv->child_count; i++) {
+ strcat(choices, children[i]->name);
+ strcat(choices, ",");
+ }
+ choices[strlen(choices) - 1] = '\0';
- GF_VALIDATE_OR_GOTO(logname,priv,out);
+ ret = gf_asprintf(&status,
+ "data-split-brain:%s "
+ "metadata-split-brain:%s%s",
+ (d_spb) ? "yes" : "no", (m_spb) ? "yes" : "no",
+ choices);
- quorum = priv->quorum_count;
- if (quorum != AFR_QUORUM_AUTO) {
- return (priv->up_count >= (priv->down_count + quorum));
+ if (-1 == ret) {
+ op_errno = ENOMEM;
+ goto out;
}
-
- quorum = priv->child_count / 2 + 1;
- if (priv->up_count >= (priv->down_count + quorum)) {
- return _gf_true;
+ ret = dict_set_dynstr_sizen(dict, GF_AFR_SBRAIN_STATUS, status);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
}
-
- /*
- * Special case for even numbers of nodes: if we have exactly half
- * and that includes the first ("senior-most") node, then that counts
- * as quorum even if it wouldn't otherwise. This supports e.g. N=2
- * while preserving the critical property that there can only be one
- * such group.
- */
- if ((priv->child_count % 2) == 0) {
- quorum = priv->child_count / 2;
- if (priv->up_count >= (priv->down_count + quorum)) {
- if (priv->child_up[0]) {
- return _gf_true;
- }
- }
+ } else {
+ ret = dict_set_sizen_str_sizen(dict, GF_AFR_SBRAIN_STATUS,
+ SFILE_NOT_UNDER_DATA);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
}
+ }
+ ret = 0;
out:
- return _gf_false;
+ AFR_STACK_UNWIND(getxattr, frame, ret, op_errno, dict, NULL);
+ if (dict)
+ dict_unref(dict);
+ if (inode)
+ inode_unref(inode);
+ return ret;
}
-void
-afr_priv_destroy (afr_private_t *priv)
+int32_t
+afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- int i = 0;
-
- if (!priv)
- goto out;
- GF_FREE (priv->last_event);
- if (priv->pending_key) {
- for (i = 0; i < priv->child_count; i++)
- GF_FREE (priv->pending_key[i]);
+ int ret = 0;
+ int op_errno = 0;
+ dict_t *dict = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *heal_local = NULL;
+ call_frame_t *heal_frame = NULL;
+
+ local = frame->local;
+ dict = dict_new();
+ if (!dict) {
+ op_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ heal_frame = afr_frame_create(this, &op_errno);
+ if (!heal_frame) {
+ ret = -1;
+ goto out;
+ }
+ heal_local = heal_frame->local;
+ heal_frame->local = frame->local;
+ /*Initiate heal with heal_frame with lk-owner set so that inodelk/entrylk
+ * work correctly*/
+ ret = afr_selfheal_do(heal_frame, this, loc->gfid);
+
+ if (ret == 1 || ret == 2) {
+ ret = dict_set_sizen_str_sizen(dict, "sh-fail-msg",
+ SFILE_NOT_IN_SPLIT_BRAIN);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Failed to set sh-fail-msg in dict");
+ ret = 0;
+ goto out;
+ } else {
+ if (local->xdata_rsp) {
+ /* 'sh-fail-msg' has been set in the dict during self-heal.*/
+ dict_copy(local->xdata_rsp, dict);
+ ret = 0;
+ } else if (ret < 0) {
+ op_errno = -ret;
+ ret = -1;
}
- GF_FREE (priv->pending_key);
- GF_FREE (priv->children);
- GF_FREE (priv->child_up);
- LOCK_DESTROY (&priv->lock);
+ }
- GF_FREE (priv);
out:
- return;
+ if (heal_frame) {
+ heal_frame->local = heal_local;
+ AFR_STACK_DESTROY(heal_frame);
+ }
+ if (local->op == GF_FOP_GETXATTR)
+ AFR_STACK_UNWIND(getxattr, frame, ret, op_errno, dict, NULL);
+ else if (local->op == GF_FOP_SETXATTR)
+ AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
+ if (dict)
+ dict_unref(dict);
+ return ret;
}
-void
-afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this)
+int
+afr_get_child_index_from_name(xlator_t *this, char *name)
{
- afr_local_t *local = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- local = frame->local;
-
- if (!local->fd)
- return;
-
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- if (!fd_ctx)
- return;
-
- fd_ctx->open_fd_count = local->open_fd_count;
+ afr_private_t *priv = this->private;
+ int index = -1;
+
+ for (index = 0; index < priv->child_count; index++) {
+ if (!strcmp(priv->children[index]->name, name))
+ goto out;
+ }
+ index = -1;
+out:
+ return index;
}
-int**
-afr_mark_pending_changelog (afr_private_t *priv, unsigned char *pending,
- dict_t *xattr, ia_type_t iat)
+void
+afr_priv_need_heal_set(afr_private_t *priv, gf_boolean_t need_heal)
{
- int i = 0;
- int **changelog = NULL;
- int idx = -1;
- int m_idx = 0;
- int ret = 0;
-
- m_idx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
-
- idx = afr_index_from_ia_type (iat);
-
- changelog = afr_matrix_create (priv->child_count, AFR_NUM_CHANGE_LOGS);
- if (!changelog)
- goto out;
+ LOCK(&priv->lock);
+ {
+ priv->need_heal = need_heal;
+ }
+ UNLOCK(&priv->lock);
+}
- for (i = 0; i < priv->child_count; i++) {
- if (!pending[i])
- continue;
-
- changelog[i][m_idx] = hton32(1);
- if (idx != -1)
- changelog[i][idx] = hton32(1);
- }
- ret = afr_set_pending_dict (priv, xattr, changelog);
- if (ret < 0) {
- afr_matrix_cleanup (changelog, priv->child_count);
- return NULL;
- }
-out:
- return changelog;
+void
+afr_set_need_heal(xlator_t *this, afr_local_t *local)
+{
+ int i = 0;
+ afr_private_t *priv = this->private;
+ gf_boolean_t need_heal = _gf_false;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && local->replies[i].need_heal) {
+ need_heal = _gf_true;
+ break;
+ }
+ }
+ afr_priv_need_heal_set(priv, need_heal);
+ return;
}
gf_boolean_t
-afr_decide_heal_info (afr_private_t *priv, unsigned char *sources, int ret)
+afr_get_need_heal(xlator_t *this)
{
- int sources_count = 0;
-
- if (ret)
- goto out;
+ afr_private_t *priv = this->private;
+ gf_boolean_t need_heal = _gf_true;
+
+ LOCK(&priv->lock);
+ {
+ need_heal = priv->need_heal;
+ }
+ UNLOCK(&priv->lock);
+ return need_heal;
+}
- sources_count = AFR_COUNT (sources, priv->child_count);
- if (sources_count == priv->child_count)
- return _gf_false;
-out:
- return _gf_true;
+int
+afr_get_msg_id(char *op_type)
+{
+ if (!strcmp(op_type, GF_AFR_REPLACE_BRICK))
+ return AFR_MSG_REPLACE_BRICK_STATUS;
+ else if (!strcmp(op_type, GF_AFR_ADD_BRICK))
+ return AFR_MSG_ADD_BRICK_STATUS;
+ return -1;
}
int
-afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this,
- inode_t *inode, gf_boolean_t *msh)
+afr_fav_child_reset_sink_xattrs_cbk(int ret, call_frame_t *heal_frame,
+ void *opaque)
{
- int ret = -1;
- unsigned char *locked_on = NULL;
- unsigned char *sources = NULL;
- unsigned char *sinks = NULL;
- unsigned char *healed_sinks = NULL;
- struct afr_reply *locked_replies = NULL;
+ call_frame_t *txn_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *heal_local = NULL;
+ xlator_t *this = NULL;
- afr_private_t *priv = this->private;
+ heal_local = heal_frame->local;
+ txn_frame = heal_local->heal_frame;
+ local = txn_frame->local;
+ this = txn_frame->this;
- locked_on = alloca0 (priv->child_count);
- sources = alloca0 (priv->child_count);
- sinks = alloca0 (priv->child_count);
- healed_sinks = alloca0 (priv->child_count);
+ /* Refresh the inode agan and proceed with the transaction.*/
+ afr_inode_refresh(txn_frame, this, local->inode, NULL, local->refreshfn);
- locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
+ AFR_STACK_DESTROY(heal_frame);
- ret = afr_selfheal_inodelk (frame, this, inode, this->name,
- LLONG_MAX - 1, 0, locked_on);
- {
- if (ret == 0) {
- /* Not a single lock */
- ret = -afr_final_errno (frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN;/* all invalid responses */
- goto out;
- }
- ret = __afr_selfheal_metadata_prepare (frame, this, inode,
- locked_on, sources,
- sinks, healed_sinks,
- locked_replies);
- *msh = afr_decide_heal_info (priv, sources, ret);
- }
- afr_selfheal_uninodelk (frame, this, inode, this->name,
- LLONG_MAX - 1, 0, locked_on);
-out:
- if (locked_replies)
- afr_replies_wipe (locked_replies, priv->child_count);
- return ret;
+ return 0;
}
int
-afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
- inode_t *inode, gf_boolean_t *dsh)
-{
- int ret = -1;
- afr_private_t *priv = NULL;
- unsigned char *locked_on = NULL;
- unsigned char *data_lock = NULL;
- unsigned char *sources = NULL;
- unsigned char *sinks = NULL;
- unsigned char *healed_sinks = NULL;
- struct afr_reply *locked_replies = NULL;
-
- priv = this->private;
- locked_on = alloca0 (priv->child_count);
- data_lock = alloca0 (priv->child_count);
- sources = alloca0 (priv->child_count);
- sinks = alloca0 (priv->child_count);
- healed_sinks = alloca0 (priv->child_count);
-
- locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
-
- ret = afr_selfheal_tryinodelk (frame, this, inode, priv->sh_domain,
- 0, 0, locked_on);
+afr_fav_child_reset_sink_xattrs(void *opaque)
+{
+ call_frame_t *heal_frame = NULL;
+ call_frame_t *txn_frame = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t d_spb = _gf_false;
+ gf_boolean_t m_spb = _gf_false;
+ afr_local_t *heal_local = NULL;
+ afr_local_t *txn_local = NULL;
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
+ unsigned char *locked_on = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
+ struct afr_reply *locked_replies = NULL;
+ int ret = 0;
+
+ heal_frame = (call_frame_t *)opaque;
+ heal_local = heal_frame->local;
+ txn_frame = heal_local->heal_frame;
+ txn_local = txn_frame->local;
+ this = txn_frame->this;
+ inode = txn_local->inode;
+ priv = this->private;
+ locked_on = alloca0(priv->child_count);
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ healed_sinks = alloca0(priv->child_count);
+ undid_pending = alloca0(priv->child_count);
+ locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
+
+ ret = _afr_is_split_brain(txn_frame, this, txn_local->replies,
+ AFR_DATA_TRANSACTION, &d_spb);
+
+ ret = _afr_is_split_brain(txn_frame, this, txn_local->replies,
+ AFR_METADATA_TRANSACTION, &m_spb);
+
+ /* Take appropriate locks and reset sink xattrs. */
+ if (d_spb) {
+ ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, 0, 0,
+ locked_on);
{
- if (ret == 0) {
- ret = -afr_final_errno (frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN;/* all invalid responses */
- goto out;
- }
- ret = afr_selfheal_inodelk (frame, this, inode, this->name,
- 0, 0, data_lock);
- {
- if (ret == 0) {
- ret = -afr_final_errno (frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN;
- /* all invalid responses */
- goto unlock;
- }
- ret = __afr_selfheal_data_prepare (frame, this, inode,
- data_lock, sources,
- sinks, healed_sinks,
- locked_replies);
- *dsh = afr_decide_heal_info (priv, sources, ret);
- }
- afr_selfheal_uninodelk (frame, this, inode, this->name, 0, 0,
- data_lock);
- }
-unlock:
- afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0,
- locked_on);
-out:
- if (locked_replies)
- afr_replies_wipe (locked_replies, priv->child_count);
- return ret;
-}
-
-int
-afr_selfheal_locked_entry_inspect (call_frame_t *frame, xlator_t *this,
- inode_t *inode,
- gf_boolean_t *esh)
-{
- int ret = -1;
- int source = -1;
- afr_private_t *priv = NULL;
- unsigned char *locked_on = NULL;
- unsigned char *data_lock = NULL;
- unsigned char *sources = NULL;
- unsigned char *sinks = NULL;
- unsigned char *healed_sinks = NULL;
- struct afr_reply *locked_replies = NULL;
-
- priv = this->private;
- locked_on = alloca0 (priv->child_count);
- data_lock = alloca0 (priv->child_count);
- sources = alloca0 (priv->child_count);
- sinks = alloca0 (priv->child_count);
- healed_sinks = alloca0 (priv->child_count);
-
- locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
-
- ret = afr_selfheal_tryentrylk (frame, this, inode, priv->sh_domain,
- NULL, locked_on);
+ if (ret < priv->child_count)
+ goto data_unlock;
+ ret = __afr_selfheal_data_prepare(
+ heal_frame, this, inode, locked_on, sources, sinks,
+ healed_sinks, undid_pending, locked_replies, NULL);
+ }
+ data_unlock:
+ afr_selfheal_uninodelk(heal_frame, this, inode, this->name, 0, 0,
+ locked_on);
+ }
+
+ if (m_spb) {
+ memset(locked_on, 0, sizeof(*locked_on) * priv->child_count);
+ memset(undid_pending, 0, sizeof(*undid_pending) * priv->child_count);
+ ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name,
+ LLONG_MAX - 1, 0, locked_on);
{
- if (ret == 0) {
- ret = -afr_final_errno (frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN;/* all invalid responses */
- goto out;
- }
-
- ret = afr_selfheal_entrylk (frame, this, inode, this->name,
- NULL, data_lock);
- {
- if (ret == 0) {
- ret = -afr_final_errno (frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN;
- /* all invalid responses */
- goto unlock;
- }
- ret = __afr_selfheal_entry_prepare (frame, this, inode,
- data_lock, sources,
- sinks, healed_sinks,
- locked_replies,
- &source);
- if ((ret == 0) && source < 0)
- ret = -EIO;
- *esh = afr_decide_heal_info (priv, sources, ret);
- }
- afr_selfheal_unentrylk (frame, this, inode, this->name, NULL,
- data_lock);
- }
-unlock:
- afr_selfheal_unentrylk (frame, this, inode, priv->sh_domain, NULL,
- locked_on);
-out:
- if (locked_replies)
- afr_replies_wipe (locked_replies, priv->child_count);
- return ret;
+ if (ret < priv->child_count)
+ goto mdata_unlock;
+ ret = __afr_selfheal_metadata_prepare(
+ heal_frame, this, inode, locked_on, sources, sinks,
+ healed_sinks, undid_pending, locked_replies, NULL);
+ }
+ mdata_unlock:
+ afr_selfheal_uninodelk(heal_frame, this, inode, this->name,
+ LLONG_MAX - 1, 0, locked_on);
+ }
+
+ return ret;
}
+/*
+ * Concatenates the xattrs in local->replies separated by a delimiter.
+ */
int
-afr_selfheal_locked_inspect (call_frame_t *frame, xlator_t *this, uuid_t gfid,
- inode_t **inode,
- gf_boolean_t *entry_selfheal,
- gf_boolean_t *data_selfheal,
- gf_boolean_t *metadata_selfheal)
-
+afr_serialize_xattrs_with_delimiter(call_frame_t *frame, xlator_t *this,
+ char *buf, const char *default_str,
+ int32_t *serz_len, char delimiter)
{
- int ret = -1;
- gf_boolean_t dsh = _gf_false;
- gf_boolean_t msh = _gf_false;
- gf_boolean_t esh = _gf_false;
-
- ret = afr_selfheal_unlocked_inspect (frame, this, gfid, inode,
- &dsh, &msh, &esh);
- if (ret)
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ char *xattr = NULL;
+ int i = 0;
+ int len = 0;
+ int keylen = 0;
+ size_t str_len = 0;
+ int ret = -1;
+
+ priv = this->private;
+ local = frame->local;
+
+ keylen = strlen(local->cont.getxattr.name);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid || local->replies[i].op_ret) {
+ str_len = strlen(default_str);
+ buf = strncat(buf, default_str, str_len);
+ len += str_len;
+ buf[len++] = delimiter;
+ buf[len] = '\0';
+ } else {
+ ret = dict_get_strn(local->replies[i].xattr,
+ local->cont.getxattr.name, keylen, &xattr);
+ if (ret) {
+ gf_msg("TEST", GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "Failed to get the node_uuid of brick "
+ "%d",
+ i);
goto out;
+ }
+ str_len = strlen(xattr);
+ buf = strncat(buf, xattr, str_len);
+ len += str_len;
+ buf[len++] = delimiter;
+ buf[len] = '\0';
+ }
+ }
+ buf[--len] = '\0'; /*remove the last delimiter*/
+ if (serz_len)
+ *serz_len = ++len;
+ ret = 0;
- /* For every heal type hold locks and check if it indeed needs heal */
+out:
+ return ret;
+}
- if (msh) {
- ret = afr_selfheal_locked_metadata_inspect (frame, this,
- *inode, &msh);
- if (ret == -EIO)
- goto out;
- }
+uint64_t
+afr_write_subvol_get(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ uint64_t write_subvol = 0;
- if (dsh) {
- ret = afr_selfheal_locked_data_inspect (frame, this, *inode,
- &dsh);
- if (ret == -EIO || (ret == -EAGAIN))
- goto out;
- }
+ local = frame->local;
+ LOCK(&local->inode->lock);
+ write_subvol = local->inode_ctx->write_subvol;
+ UNLOCK(&local->inode->lock);
- if (esh) {
- ret = afr_selfheal_locked_entry_inspect (frame, this, *inode,
- &esh);
- }
+ return write_subvol;
+}
-out:
- *data_selfheal = dsh;
- *entry_selfheal = esh;
- *metadata_selfheal = msh;
- return ret;
+int
+afr_write_subvol_set(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char *data_accused = NULL;
+ unsigned char *metadata_accused = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint64_t val = 0;
+ int event = 0;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+ data_accused = alloca0(priv->child_count);
+ metadata_accused = alloca0(priv->child_count);
+ data_readable = alloca0(priv->child_count);
+ metadata_readable = alloca0(priv->child_count);
+ event = local->event_generation;
+
+ afr_readables_fill(frame, this, local->inode, data_accused,
+ metadata_accused, data_readable, metadata_readable,
+ NULL);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_readable[i])
+ datamap |= (1 << i);
+ if (metadata_readable[i])
+ metadatamap |= (1 << i);
+ }
+
+ val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
+ (((uint64_t)event) << 32);
+
+ LOCK(&local->inode->lock);
+ {
+ if (local->inode_ctx->write_subvol == 0 &&
+ local->transaction.type == AFR_DATA_TRANSACTION) {
+ local->inode_ctx->write_subvol = val;
+ }
+ }
+ UNLOCK(&local->inode->lock);
+
+ return 0;
}
-dict_t*
-afr_set_heal_info (char *status)
+int
+afr_write_subvol_reset(call_frame_t *frame, xlator_t *this)
{
- dict_t *dict = NULL;
- int ret = -1;
+ afr_local_t *local = NULL;
- dict = dict_new ();
- if (!dict) {
- ret = -ENOMEM;
- goto out;
- }
+ local = frame->local;
+ LOCK(&local->inode->lock);
+ {
+ GF_ASSERT(local->inode_ctx->lock_count > 0);
+ local->inode_ctx->lock_count--;
- if (!strcmp (status, "heal")) {
- ret = dict_set_str (dict, "heal-info", "heal");
- if (ret)
- gf_log ("", GF_LOG_WARNING,
- "Failed to set heal-info key to"
- "heal");
- } else if (!strcmp (status, "split-brain")) {
- ret = dict_set_str (dict, "heal-info", "split-brain");
- if (ret)
- gf_log ("", GF_LOG_WARNING,
- "Failed to set heal-info key to"
- "split-brain");
- } else if (!strcmp (status, "possibly-healing")) {
- ret = dict_set_str (dict, "heal-info", "possibly-healing");
- if (ret)
- gf_log ("", GF_LOG_WARNING,
- "Failed to set heal-info key to"
- "possibly-healing");
- }
-out:
- return dict;
+ if (!local->inode_ctx->lock_count)
+ local->inode_ctx->write_subvol = 0;
+ }
+ UNLOCK(&local->inode->lock);
+
+ return 0;
}
int
-afr_get_heal_info (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xdata)
+afr_set_inode_local(xlator_t *this, afr_local_t *local, inode_t *inode)
{
- gf_boolean_t data_selfheal = _gf_false;
- gf_boolean_t metadata_selfheal = _gf_false;
- gf_boolean_t entry_selfheal = _gf_false;
- dict_t *dict = NULL;
- int ret = -1;
- int op_errno = 0;
- inode_t *inode = NULL;
+ int ret = 0;
+
+ local->inode = inode_ref(inode);
+ LOCK(&local->inode->lock);
+ {
+ ret = __afr_inode_ctx_get(this, local->inode, &local->inode_ctx);
+ }
+ UNLOCK(&local->inode->lock);
+ if (ret < 0) {
+ gf_msg_callingfn(
+ this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_INODE_CTX_GET_FAILED,
+ "Error getting inode ctx %s", uuid_utoa(local->inode->gfid));
+ }
+ return ret;
+}
- ret = afr_selfheal_locked_inspect (frame, this, loc->gfid, &inode,
- &entry_selfheal,
- &data_selfheal, &metadata_selfheal);
+gf_boolean_t
+afr_ta_is_fop_called_from_synctask(xlator_t *this)
+{
+ struct synctask *task = NULL;
+ gf_lkowner_t tmp_owner = {
+ 0,
+ };
- if (ret == -ENOMEM) {
- op_errno = -ret;
- ret = -1;
- goto out;
- }
+ task = synctask_get();
+ if (!task)
+ return _gf_false;
- if (ret == -EIO) {
- dict = afr_set_heal_info ("split-brain");
- } else if (ret == -EAGAIN) {
- dict = afr_set_heal_info ("possibly-healing");
- } else if (ret >= 0) {
- /* value of ret = source index
- * so ret >= 0 and at least one of the 3 booleans set to
- * true means a source is identified; heal is required.
- */
- if (!data_selfheal && !entry_selfheal &&
- !metadata_selfheal) {
- dict = afr_set_heal_info ("no-heal");
- } else {
- dict = afr_set_heal_info ("heal");
- }
- } else if (ret < 0) {
- /* Apart from above checked -ve ret values, there are
- * other possible ret values like ENOTCONN
- * (returned when number of valid replies received are
- * less than 2)
- * in which case heal is required when one of the
- * selfheal booleans is set.
- */
- if (data_selfheal || entry_selfheal ||
- metadata_selfheal) {
- dict = afr_set_heal_info ("heal");
- }
- }
- ret = 0;
+ set_lk_owner_from_ptr(&tmp_owner, (void *)this);
+
+ if (!is_same_lkowner(&tmp_owner, &task->frame->root->lk_owner))
+ return _gf_false;
+ return _gf_true;
+}
+
+int
+afr_ta_post_op_lock(xlator_t *this, loc_t *loc)
+{
+ int ret = 0;
+ uuid_t gfid = {
+ 0,
+ };
+ afr_private_t *priv = this->private;
+ gf_boolean_t locked = _gf_false;
+ struct gf_flock flock1 = {
+ 0,
+ };
+ struct gf_flock flock2 = {
+ 0,
+ };
+ int32_t cmd = 0;
+
+ /* Clients must take AFR_TA_DOM_NOTIFY lock only when the previous lock
+ * has been released in afr_notify due to upcall notification from shd.
+ */
+ GF_ASSERT(priv->ta_notify_dom_lock_offset == 0);
+
+ if (!priv->shd.iamshd)
+ GF_ASSERT(afr_ta_is_fop_called_from_synctask(this));
+ flock1.l_type = F_WRLCK;
+
+ while (!locked) {
+ if (priv->shd.iamshd) {
+ cmd = F_SETLKW;
+ flock1.l_start = 0;
+ flock1.l_len = 0;
+ } else {
+ cmd = F_SETLK;
+ gf_uuid_generate(gfid);
+ flock1.l_start = gfid_to_ino(gfid);
+ if (flock1.l_start < 0)
+ flock1.l_start = -flock1.l_start;
+ flock1.l_len = 1;
+ }
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_NOTIFY, loc, cmd, &flock1, NULL, NULL);
+ if (!ret) {
+ locked = _gf_true;
+ priv->ta_notify_dom_lock_offset = flock1.l_start;
+ } else if (ret == -EAGAIN) {
+ continue;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to get "
+ "AFR_TA_DOM_NOTIFY lock on %s.",
+ loc->name);
+ goto out;
+ }
+ }
+
+ flock2.l_type = F_WRLCK;
+ flock2.l_start = 0;
+ flock2.l_len = 0;
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_MODIFY, loc, F_SETLKW, &flock2, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to get AFR_TA_DOM_MODIFY lock on %s.", loc->name);
+ flock1.l_type = F_UNLCK;
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_NOTIFY, loc, F_SETLK, &flock1, NULL,
+ NULL);
+ }
out:
- AFR_STACK_UNWIND (getxattr, frame, ret, op_errno, dict, NULL);
- if (dict)
- dict_unref (dict);
- if (inode) {
- inode_forget (inode, 1);
- inode_unref (inode);
- }
- return ret;
+ return ret;
}
int
-afr_set_split_brain_status (call_frame_t *frame, xlator_t *this,
- struct afr_reply *replies,
- afr_transaction_type type,
- gf_boolean_t *spb)
-{
- afr_private_t *priv = NULL;
- uint64_t *witness = NULL;
- unsigned char *sources = NULL;
- unsigned char *sinks = NULL;
- int sources_count = 0;
- int ret = 0;
-
- priv = this->private;
-
- sources = alloca0 (priv->child_count);
- sinks = alloca0 (priv->child_count);
- witness = alloca0(priv->child_count * sizeof (*witness));
-
- ret = afr_selfheal_find_direction (frame, this, replies,
- type, priv->child_up, sources,
- sinks, witness);
- if (ret)
- return ret;
+afr_ta_post_op_unlock(xlator_t *this, loc_t *loc)
+{
+ afr_private_t *priv = this->private;
+ struct gf_flock flock = {
+ 0,
+ };
+ int ret = 0;
+
+ if (!priv->shd.iamshd)
+ GF_ASSERT(afr_ta_is_fop_called_from_synctask(this));
+ flock.l_type = F_UNLCK;
+ flock.l_start = 0;
+ flock.l_len = 0;
+
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_MODIFY, loc, F_SETLK, &flock, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to unlock AFR_TA_DOM_MODIFY lock.");
+ goto out;
+ }
+
+ if (!priv->shd.iamshd)
+ /* Mounts (clients) will not release the AFR_TA_DOM_NOTIFY lock
+ * in post-op as they use it as a notification mechanism. When
+ * shd sends a lock request on TA during heal, the clients will
+ * receive a lock-contention upcall notification upon which they
+ * will release the AFR_TA_DOM_NOTIFY lock after completing the
+ * in flight I/O.*/
+ goto out;
+
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_NOTIFY, loc, F_SETLK, &flock, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to unlock AFR_TA_DOM_NOTIFY lock.");
+ }
+out:
+ return ret;
+}
- sources_count = AFR_COUNT (sources, priv->child_count);
- if (!sources_count)
- *spb = _gf_true;
+call_frame_t *
+afr_ta_frame_create(xlator_t *this)
+{
+ call_frame_t *frame = NULL;
+ void *lk_owner = NULL;
- return ret;
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ return NULL;
+ lk_owner = (void *)this;
+ afr_set_lk_owner(frame, this, lk_owner);
+ return frame;
}
-int
-afr_get_split_brain_status (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- gf_boolean_t d_spb = _gf_false;
- gf_boolean_t m_spb = _gf_false;
- int ret = -1;
- int op_errno = 0;
- int i = 0;
- char *choices = NULL;
- char *status = NULL;
- dict_t *dict = NULL;
- struct afr_reply *replies = NULL;
- inode_t *inode = NULL;
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
-
- priv = this->private;
- children = priv->children;
-
- inode = afr_inode_find (this, loc->gfid);
- if (!inode)
- goto out;
- replies = alloca0 (sizeof (*replies) * priv->child_count);
+gf_boolean_t
+afr_ta_has_quorum(afr_private_t *priv, afr_local_t *local)
+{
+ int data_count = 0;
- /* Calculation for string length :
- * (child_count X length of child-name) + strlen (" Choices :")
- * child-name consists of :
- * a) 256 = max characters for volname according to GD_VOLUME_NAME_MAX
- * b) strlen ("-client-00,") assuming 16 replicas
- */
- choices = alloca0 (priv->child_count * (256 + strlen ("-client-00,")) +
- strlen (" Choices:"));
- ret = afr_selfheal_unlocked_discover (frame, inode, loc->gfid, replies);
- if (ret) {
- op_errno = -ret;
- ret = -1;
- goto out;
- }
+ data_count = AFR_COUNT(local->child_up, priv->child_count);
+ if (data_count == 2) {
+ return _gf_true;
+ } else if (data_count == 1 && local->ta_child_up) {
+ return _gf_true;
+ }
- ret = afr_set_split_brain_status (frame, this, replies,
- AFR_DATA_TRANSACTION, &d_spb);
- if (ret) {
- op_errno = -ret;
- ret = -1;
- goto out;
- }
+ return _gf_false;
+}
- ret = afr_set_split_brain_status (frame, this, replies,
- AFR_METADATA_TRANSACTION, &m_spb);
- if (ret) {
- op_errno = -ret;
- ret = -1;
- goto out;
- }
+static gf_boolean_t
+afr_is_add_replica_mount_lookup_on_root(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
- dict = dict_new ();
- if (!dict) {
- op_errno = ENOMEM;
- ret = -1;
- goto out;
- }
+ if (frame->root->pid != GF_CLIENT_PID_ADD_REPLICA_MOUNT)
+ return _gf_false;
- if (d_spb || m_spb) {
- sprintf (choices, " Choices:");
- for (i = 0; i < priv->child_count; i++) {
- strcat (choices, children[i]->name);
- strcat (choices, ",");
- }
- choices[strlen (choices) - 1] = '\0';
+ local = frame->local;
- ret = gf_asprintf (&status, "data-split-brain:%s "
- "metadata-split-brain:%s%s",
- (d_spb) ? "yes" : "no",
- (m_spb) ? "yes" : "no", choices);
+ if (local->op != GF_FOP_LOOKUP)
+ /* TODO:If the replica count is being increased on a plain distribute
+ * volume that was never mounted, we need to allow setxattr on '/' with
+ * GF_CLIENT_PID_NO_ROOT_SQUASH to accomodate for DHT layout setting */
+ return _gf_false;
- if (-1 == ret) {
- op_errno = ENOMEM;
- goto out;
- }
- ret = dict_set_dynstr (dict, GF_AFR_SBRAIN_STATUS, status);
- if (ret)
- goto out;
- } else {
- ret = dict_set_str (dict, GF_AFR_SBRAIN_STATUS,
- "The file is not under data or"
- " metadata split-brain");
- if (ret)
- goto out;
- }
+ if (local->inode == NULL)
+ return _gf_false;
- ret = 0;
-out:
- AFR_STACK_UNWIND (getxattr, frame, ret, op_errno, dict, NULL);
- if (dict)
- dict_unref (dict);
- if (replies)
- afr_replies_wipe (replies, priv->child_count);
- if (inode)
- inode_unref (inode);
- return ret;
+ if (!__is_root_gfid(local->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
}
-int32_t
-afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc)
+gf_boolean_t
+afr_lookup_has_quorum(call_frame_t *frame, const unsigned int up_children_count)
{
- int ret = 0;
- int op_errno = 0;
- dict_t *dict = NULL;
- afr_local_t *local = NULL;
+ if (frame && (up_children_count > 0) &&
+ afr_is_add_replica_mount_lookup_on_root(frame))
+ return _gf_true;
- local = frame->local;
- dict = dict_new ();
- if (!dict) {
- op_errno = ENOMEM;
- ret = -1;
- goto out;
- }
+ return _gf_false;
+}
- ret = afr_selfheal_do (frame, this, loc->gfid);
+void
+afr_handle_replies_quorum(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ unsigned char *success_replies = NULL;
- if (ret == 1 || ret == 2) {
- ret = dict_set_str (dict, "sh-fail-msg",
- "File not in split-brain");
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "Failed to set sh-fail-msg in dict");
- ret = 0;
- goto out;
- } else {
- if (local->xdata_rsp) {
- /* 'sh-fail-msg' has been set in the dict during self-heal.*/
- dict_copy (local->xdata_rsp, dict);
- ret = 0;
- } else if (ret < 0) {
- op_errno = -ret;
- ret = -1;
- }
- }
+ success_replies = alloca0(priv->child_count);
+ afr_fill_success_replies(local, priv, success_replies);
-out:
- if (local->op == GF_FOP_GETXATTR)
- AFR_STACK_UNWIND (getxattr, frame, ret, op_errno, dict, NULL);
- else if (local->op == GF_FOP_SETXATTR)
- AFR_STACK_UNWIND (setxattr, frame, ret, op_errno, NULL);
- if (dict)
- dict_unref(dict);
- return ret;
+ if (priv->quorum_count && !afr_has_quorum(success_replies, this, NULL)) {
+ local->op_errno = afr_final_errno(local, priv);
+ if (!local->op_errno)
+ local->op_errno = afr_quorum_errno(priv);
+ local->op_ret = -1;
+ }
}
-int
-afr_get_child_index_from_name (xlator_t *this, char *name)
+gf_boolean_t
+afr_ta_dict_contains_pending_xattr(dict_t *dict, afr_private_t *priv, int child)
{
- afr_private_t *priv = this->private;
- int index = -1;
-
- for (index = 0; index < priv->child_count; index++) {
- if (!strcmp (priv->children[index]->name, name))
- goto out;
+ int *pending = NULL;
+ int ret = 0;
+ int i = 0;
+
+ ret = dict_get_ptr(dict, priv->pending_key[child], (void *)&pending);
+ if (ret == 0) {
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
+ /* Not doing a ntoh32(pending) as we just want to check
+ * if it is non-zero or not. */
+ if (pending[i]) {
+ return _gf_true;
+ }
}
- index = -1;
-out:
- return index;
+ }
+
+ return _gf_false;
}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index 984ed9c6095..f8bf8340dab 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -8,334 +8,339 @@
cases as published by the Free Software Foundation.
*/
-
#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
#include <string.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "checksum.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/list.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
#include "afr.h"
#include "afr-transaction.h"
-
int32_t
-afr_opendir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd, dict_t *xdata)
+afr_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int call_count = -1;
- int32_t child_index = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- local = frame->local;
- fd_ctx = local->fd_ctx;
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
- } else {
- local->op_ret = op_ret;
- fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
- if (!local->xdata_rsp && xdata)
- local->xdata_rsp = dict_ref (xdata);
- }
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int32_t child_index = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ local = frame->local;
+ fd_ctx = local->fd_ctx;
+ child_index = (long)cookie;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ } else {
+ local->op_ret = op_ret;
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
}
- UNLOCK (&frame->lock);
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
- call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ afr_handle_replies_quorum(frame, this);
+ AFR_STACK_UNWIND(opendir, frame, local->op_ret, local->op_errno,
+ local->fd, NULL);
+ }
- if (call_count == 0)
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd, NULL);
- return 0;
+ return 0;
}
-
int
-afr_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- int i = 0;
- int call_count = -1;
- int32_t op_errno = ENOMEM;
- afr_fd_ctx_t *fd_ctx = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = -1;
+ int32_t op_errno = ENOMEM;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ priv = this->private;
- priv = this->private;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
+ local->op = GF_FOP_OPENDIR;
- fd_ctx = afr_fd_ctx_get (fd, this);
- if (!fd_ctx)
- goto out;
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
+ op_errno = afr_quorum_errno(priv);
+ goto out;
+ }
- loc_copy (&local->loc, loc);
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
- local->fd = fd_ref (fd);
- local->fd_ctx = fd_ctx;
+ fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!fd_ctx)
+ goto out;
- call_count = local->call_count;
+ loc_copy(&local->loc, loc);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_opendir_cbk,
- (void*) (long) i,
- priv->children[i],
- priv->children[i]->fops->opendir,
- loc, fd, NULL);
+ local->fd = fd_ref(fd);
+ local->fd_ctx = fd_ctx;
- if (!--call_count)
- break;
- }
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(frame, afr_opendir_cbk, (void *)(long)i,
+ priv->children[i],
+ priv->children[i]->fops->opendir, loc, fd, NULL);
+
+ if (!--call_count)
+ break;
}
+ }
- return 0;
+ return 0;
out:
- AFR_STACK_UNWIND (opendir, frame, -1, op_errno, fd, NULL);
- return 0;
+ AFR_STACK_UNWIND(opendir, frame, -1, op_errno, fd, NULL);
+ return 0;
}
static int
-afr_validate_read_subvol (inode_t *inode, xlator_t *this, int par_read_subvol)
+afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol)
{
- int gen = 0;
- int entry_read_subvol = 0;
- unsigned char *data_readable = NULL;
- unsigned char *metadata_readable = NULL;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- data_readable = alloca0 (priv->child_count);
- metadata_readable = alloca0 (priv->child_count);
-
- afr_inode_read_subvol_get (inode, this, data_readable,
- metadata_readable, &gen);
-
- if (gen != priv->event_generation ||
- !data_readable[par_read_subvol] ||
- !metadata_readable[par_read_subvol])
- return -1;
-
- /* Once the control reaches the following statement, it means that the
- * parent's read subvol is perfectly readable. So calling
- * either afr_data_subvol_get() or afr_metadata_subvol_get() would
- * yield the same result. Hence, choosing afr_data_subvol_get() below.
- */
-
- if (!priv->consistent_metadata)
- return 0;
-
- entry_read_subvol = afr_data_subvol_get (inode, this, 0, 0);
- if (entry_read_subvol != par_read_subvol)
- return -1;
-
+ int gen = 0;
+ int entry_read_subvol = 0;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ data_readable = alloca0(priv->child_count);
+ metadata_readable = alloca0(priv->child_count);
+
+ afr_inode_read_subvol_get(inode, this, data_readable, metadata_readable,
+ &gen);
+
+ if (gen != priv->event_generation || !data_readable[par_read_subvol] ||
+ !metadata_readable[par_read_subvol])
+ return -1;
+
+ /* Once the control reaches the following statement, it means that the
+ * parent's read subvol is perfectly readable. So calling
+ * either afr_data_subvol_get() or afr_metadata_subvol_get() would
+ * yield the same result. Hence, choosing afr_data_subvol_get() below.
+ */
+
+ if (!priv->consistent_metadata)
return 0;
+ /* For an inode fetched through readdirp which is yet to be linked,
+ * inode ctx would not be initialised (yet). So this function returns
+ * -1 above due to gen being 0, which is why it is OK to pass NULL for
+ * read_subvol_args here.
+ */
+ entry_read_subvol = afr_data_subvol_get(inode, this, NULL, NULL, NULL,
+ NULL);
+ if (entry_read_subvol != par_read_subvol)
+ return -1;
+
+ return 0;
}
static void
-afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol,
- gf_dirent_t *entries, fd_t *fd)
+afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
+ int subvol, gf_dirent_t *entries, fd_t *fd)
{
- int ret = -1;
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
- xlator_t *this = NULL;
-
- this = THIS;
-
- list_for_each_entry_safe (entry, tmp, &subvol_entries->list, list) {
- if (__is_root_gfid (fd->inode->gfid) &&
- !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
- continue;
- }
-
- list_del_init (&entry->list);
- list_add_tail (&entry->list, &entries->list);
-
- if (entry->inode) {
- ret = afr_validate_read_subvol (entry->inode, this,
- subvol);
- if (ret == -1) {
- inode_unref (entry->inode);
- entry->inode = NULL;
- continue;
- }
- }
+ int ret = -1;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t need_heal = _gf_false;
+ gf_boolean_t validate_subvol = _gf_false;
+
+ this = THIS;
+ priv = this->private;
+
+ need_heal = afr_get_need_heal(this);
+ validate_subvol = need_heal | priv->consistent_metadata;
+
+ list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list)
+ {
+ if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name,
+ frame->root->pid)) {
+ continue;
}
-}
+ list_del_init(&entry->list);
+ list_add_tail(&entry->list, &entries->list);
+
+ if (!validate_subvol)
+ continue;
+
+ if (entry->inode) {
+ ret = afr_validate_read_subvol(entry->inode, this, subvol);
+ if (ret == -1) {
+ inode_unref(entry->inode);
+ entry->inode = NULL;
+ continue;
+ }
+ }
+ }
+}
int32_t
-afr_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *subvol_entries,
- dict_t *xdata)
+afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *subvol_entries,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- gf_dirent_t entries;
+ afr_local_t *local = NULL;
+ gf_dirent_t entries;
- INIT_LIST_HEAD (&entries.list);
+ INIT_LIST_HEAD(&entries.list);
- local = frame->local;
+ local = frame->local;
- if (op_ret < 0 && !local->cont.readdir.offset) {
- /* failover only if this was first readdir, detected
- by offset == 0 */
- local->op_ret = op_ret;
- local->op_errno = op_errno;
+ if (op_ret < 0 && !local->cont.readdir.offset) {
+ /* failover only if this was first readdir, detected
+ by offset == 0 */
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- afr_read_txn_continue (frame, this, (long) cookie);
- return 0;
- }
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
- if (op_ret >= 0)
- afr_readdir_transform_entries (subvol_entries, (long) cookie,
- &entries, local->fd);
+ if (op_ret >= 0)
+ afr_readdir_transform_entries(frame, subvol_entries, (long)cookie,
+ &entries, local->fd);
- AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries, xdata);
+ AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata);
- gf_dirent_free (&entries);
+ gf_dirent_free(&entries);
- return 0;
+ return 0;
}
-
int
-afr_readdir_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_readdir_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- priv = this->private;
- local = frame->local;
- fd_ctx = afr_fd_ctx_get (local->fd, this);
-
- if (subvol == -1) {
- AFR_STACK_UNWIND (readdir, frame, local->op_ret,
- local->op_errno, 0, 0);
- return 0;
- }
-
- fd_ctx->readdir_subvol = subvol;
-
- if (local->op == GF_FOP_READDIR)
- STACK_WIND_COOKIE (frame, afr_readdir_cbk,
- (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->readdir,
- local->fd, local->cont.readdir.size,
- local->cont.readdir.offset,
- local->xdata_req);
- else
- STACK_WIND_COOKIE (frame, afr_readdir_cbk,
- (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->readdirp,
- local->fd, local->cont.readdir.size,
- local->cont.readdir.offset,
- local->xdata_req);
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ fd_ctx = afr_fd_ctx_get(local->fd, this);
+ if (!fd_ctx) {
+ local->op_errno = EINVAL;
+ local->op_ret = -1;
+ }
+
+ if (subvol == -1 || !fd_ctx) {
+ AFR_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno, 0, 0);
+ return 0;
+ }
+
+ fd_ctx->readdir_subvol = subvol;
+
+ if (local->op == GF_FOP_READDIR)
+ STACK_WIND_COOKIE(frame, afr_readdir_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readdir, local->fd,
+ local->cont.readdir.size, local->cont.readdir.offset,
+ local->xdata_req);
+ else
+ STACK_WIND_COOKIE(frame, afr_readdir_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readdirp, local->fd,
+ local->cont.readdir.size, local->cont.readdir.offset,
+ local->xdata_req);
+ return 0;
}
-
int
-afr_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, int whichop, dict_t *dict)
+afr_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, int whichop, dict_t *dict)
{
- afr_local_t *local = NULL;
- int32_t op_errno = 0;
- int subvol = -1;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
-
- fd_ctx = afr_fd_ctx_get (fd, this);
- if (!fd_ctx) {
- op_errno = EINVAL;
- goto out;
- }
-
- local->op = whichop;
- local->fd = fd_ref (fd);
- local->cont.readdir.size = size;
- local->cont.readdir.offset = offset;
- local->xdata_req = (dict)? dict_ref (dict) : NULL;
-
- subvol = fd_ctx->readdir_subvol;
-
- if (offset == 0 || subvol == -1) {
- /* First readdir has option of failing over and selecting
- an appropriate read subvolume */
- afr_read_txn (frame, this, fd->inode, afr_readdir_wind,
- AFR_DATA_TRANSACTION);
- } else {
- /* But continued readdirs MUST stick to the same subvolume
- without an option to failover */
- afr_readdir_wind (frame, this, subvol);
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ int subvol = -1;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!fd_ctx) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ local->op = whichop;
+ local->fd = fd_ref(fd);
+ local->cont.readdir.size = size;
+ local->cont.readdir.offset = offset;
+ local->xdata_req = (dict) ? dict_ref(dict) : NULL;
+
+ subvol = fd_ctx->readdir_subvol;
+
+ if (offset == 0 || subvol == -1) {
+ /* First readdir has option of failing over and selecting
+ an appropriate read subvolume */
+ afr_read_txn(frame, this, fd->inode, afr_readdir_wind,
+ AFR_DATA_TRANSACTION);
+ } else {
+ /* But continued readdirs MUST stick to the same subvolume
+ without an option to failover */
+ afr_readdir_wind(frame, this, subvol);
+ }
+
+ return 0;
out:
- AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
- return 0;
+ AFR_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
-
int32_t
-afr_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *xdata)
+afr_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
+ afr_do_readdir(frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
- return 0;
+ return 0;
}
-
int32_t
-afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *dict)
+afr_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP, dict);
+ afr_do_readdir(frame, this, fd, size, offset, GF_FOP_READDIRP, dict);
- return 0;
+ return 0;
}
-
int32_t
-afr_releasedir (xlator_t *this, fd_t *fd)
+afr_releasedir(xlator_t *this, fd_t *fd)
{
- afr_cleanup_fd_ctx (this, fd);
+ afr_cleanup_fd_ctx(this, fd);
- return 0;
+ return 0;
}
diff --git a/xlators/cluster/afr/src/afr-dir-read.h b/xlators/cluster/afr/src/afr-dir-read.h
index 09456d15949..773e925ec6c 100644
--- a/xlators/cluster/afr/src/afr-dir-read.h
+++ b/xlators/cluster/afr/src/afr-dir-read.h
@@ -11,26 +11,23 @@
#ifndef __DIR_READ_H__
#define __DIR_READ_H__
-
int32_t
-afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd, dict_t *xdata);
+afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata);
int32_t
-afr_releasedir (xlator_t *this, fd_t *fd);
+afr_releasedir(xlator_t *this, fd_t *fd);
int32_t
-afr_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, dict_t *xdata);
-
+afr_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata);
int32_t
-afr_readdirp (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, dict_t *dict);
+afr_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict);
int32_t
-afr_checksum (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, dict_t *xdata);
-
+afr_checksum(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ dict_t *xdata);
#endif /* __DIR_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index f7ca9108092..b7cceb79158 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -8,506 +8,493 @@
cases as published by the Free Software Foundation.
*/
-
#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/list.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
#include "afr.h"
#include "afr-transaction.h"
void
-afr_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this);
+afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this);
int
-afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno)
+afr_build_parent_loc(loc_t *parent, loc_t *child, int32_t *op_errno)
{
- int ret = -1;
- char *child_path = NULL;
+ int ret = -1;
+ char *child_path = NULL;
+
+ if (!child->parent) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ child_path = gf_strdup(child->path);
+ if (!child_path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ parent->path = gf_strdup(dirname(child_path));
+ if (!parent->path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ parent->inode = inode_ref(child->parent);
+ gf_uuid_copy(parent->gfid, child->pargfid);
+
+ ret = 0;
+out:
+ GF_FREE(child_path);
- if (!child->parent) {
- if (op_errno)
- *op_errno = EINVAL;
- goto out;
- }
+ return ret;
+}
- child_path = gf_strdup (child->path);
- if (!child_path) {
- if (op_errno)
- *op_errno = ENOMEM;
- goto out;
+static void
+__afr_dir_write_finalize(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int inode_read_subvol = -1;
+ int parent_read_subvol = -1;
+ int parent2_read_subvol = -1;
+ int i = 0;
+ afr_read_subvol_args_t args = {
+ 0,
+ };
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret == -1)
+ continue;
+ gf_uuid_copy(args.gfid, local->replies[i].poststat.ia_gfid);
+ args.ia_type = local->replies[i].poststat.ia_type;
+ break;
+ }
+
+ if (local->inode) {
+ if (local->op != GF_FOP_RENAME && local->op != GF_FOP_LINK)
+ afr_replies_interpret(frame, this, local->inode, NULL);
+
+ inode_read_subvol = afr_data_subvol_get(local->inode, this, NULL, NULL,
+ NULL, &args);
+ }
+
+ if (local->parent)
+ parent_read_subvol = afr_data_subvol_get(local->parent, this, NULL,
+ local->readable, NULL, NULL);
+
+ if (local->parent2)
+ parent2_read_subvol = afr_data_subvol_get(local->parent2, this, NULL,
+ local->readable2, NULL, NULL);
+
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+ afr_pick_error_xdata(local, priv, local->parent, local->readable,
+ local->parent2, local->readable2);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret < 0) {
+ if (local->inode)
+ afr_inode_need_refresh_set(local->inode, this);
+ if (local->parent)
+ afr_inode_need_refresh_set(local->parent, this);
+ if (local->parent2)
+ afr_inode_need_refresh_set(local->parent2, this);
+ continue;
}
- parent->path = gf_strdup (dirname (child_path));
- if (!parent->path) {
- if (op_errno)
- *op_errno = ENOMEM;
- goto out;
+ if (local->op_ret == -1) {
+ local->op_ret = local->replies[i].op_ret;
+ local->op_errno = local->replies[i].op_errno;
+
+ local->cont.dir_fop.buf = local->replies[i].poststat;
+ local->cont.dir_fop.preparent = local->replies[i].preparent;
+ local->cont.dir_fop.postparent = local->replies[i].postparent;
+ local->cont.dir_fop.prenewparent = local->replies[i].preparent2;
+ local->cont.dir_fop.postnewparent = local->replies[i].postparent2;
+ if (local->xdata_rsp) {
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = NULL;
+ }
+
+ if (local->replies[i].xdata)
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ continue;
}
- parent->inode = inode_ref (child->parent);
- gf_uuid_copy (parent->gfid, child->pargfid);
-
- ret = 0;
-out:
- GF_FREE (child_path);
-
- return ret;
-}
+ if (i == inode_read_subvol) {
+ local->cont.dir_fop.buf = local->replies[i].poststat;
+ if (local->replies[i].xdata) {
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ }
+ }
+ if (i == parent_read_subvol) {
+ local->cont.dir_fop.preparent = local->replies[i].preparent;
+ local->cont.dir_fop.postparent = local->replies[i].postparent;
+ }
-static void
-__afr_dir_write_finalize (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int inode_read_subvol = -1;
- int parent_read_subvol = -1;
- int parent2_read_subvol = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- if (local->inode) {
- afr_replies_interpret (frame, this, local->inode);
- inode_read_subvol = afr_data_subvol_get (local->inode, this,
- NULL, NULL);
- }
- if (local->parent)
- parent_read_subvol = afr_data_subvol_get (local->parent, this,
- NULL, NULL);
- if (local->parent2)
- parent2_read_subvol = afr_data_subvol_get (local->parent2, this,
- NULL, NULL);
-
- local->op_ret = -1;
- local->op_errno = afr_final_errno (local, priv);
-
- for (i = 0; i < priv->child_count; i++) {
- if (!local->replies[i].valid)
- continue;
- if (local->replies[i].op_ret < 0) {
- if (local->inode)
- afr_inode_read_subvol_reset (local->inode,
- this);
- if (local->parent)
- afr_inode_read_subvol_reset (local->parent,
- this);
- if (local->parent2)
- afr_inode_read_subvol_reset (local->parent2,
- this);
- continue;
- }
-
- if (local->op_ret == -1) {
- local->op_ret = local->replies[i].op_ret;
- local->op_errno = local->replies[i].op_errno;
-
- local->cont.dir_fop.buf =
- local->replies[i].poststat;
- local->cont.dir_fop.preparent =
- local->replies[i].preparent;
- local->cont.dir_fop.postparent =
- local->replies[i].postparent;
- local->cont.dir_fop.prenewparent =
- local->replies[i].preparent2;
- local->cont.dir_fop.postnewparent =
- local->replies[i].postparent2;
- if (local->replies[i].xdata)
- local->xdata_rsp =
- dict_ref (local->replies[i].xdata);
- continue;
- }
-
- if (i == inode_read_subvol) {
- local->cont.dir_fop.buf =
- local->replies[i].poststat;
- if (local->replies[i].xdata) {
- if (local->xdata_rsp)
- dict_unref (local->xdata_rsp);
- local->xdata_rsp =
- dict_ref (local->replies[i].xdata);
- }
- }
-
- if (i == parent_read_subvol) {
- local->cont.dir_fop.preparent =
- local->replies[i].preparent;
- local->cont.dir_fop.postparent =
- local->replies[i].postparent;
- }
-
- if (i == parent2_read_subvol) {
- local->cont.dir_fop.prenewparent =
- local->replies[i].preparent2;
- local->cont.dir_fop.postnewparent =
- local->replies[i].postparent2;
- }
- }
+ if (i == parent2_read_subvol) {
+ local->cont.dir_fop.prenewparent = local->replies[i].preparent2;
+ local->cont.dir_fop.postnewparent = local->replies[i].postparent2;
+ }
+ }
}
-
static void
-__afr_dir_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
- int op_ret, int op_errno, struct iatt *poststat,
- struct iatt *preparent, struct iatt *postparent,
- struct iatt *preparent2, struct iatt *postparent2,
- dict_t *xdata)
+__afr_dir_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
+ int op_ret, int op_errno, struct iatt *poststat,
+ struct iatt *preparent, struct iatt *postparent,
+ struct iatt *preparent2, struct iatt *postparent2,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- local = frame->local;
- fd_ctx = local->fd_ctx;
-
- local->replies[child_index].valid = 1;
- local->replies[child_index].op_ret = op_ret;
- local->replies[child_index].op_errno = op_errno;
-
- if (op_ret >= 0) {
- if (poststat)
- local->replies[child_index].poststat = *poststat;
- if (preparent)
- local->replies[child_index].preparent = *preparent;
- if (postparent)
- local->replies[child_index].postparent = *postparent;
- if (preparent2)
- local->replies[child_index].preparent2 = *preparent2;
- if (postparent2)
- local->replies[child_index].postparent2 = *postparent2;
- if (xdata)
- local->replies[child_index].xdata = dict_ref (xdata);
-
- if (fd_ctx)
- fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
- } else {
- if (op_errno != ENOTEMPTY)
- afr_transaction_fop_failed (frame, this, child_index);
- if (fd_ctx)
- fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
- }
-
- return;
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ local = frame->local;
+ fd_ctx = local->fd_ctx;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref(xdata);
+
+ if (op_ret >= 0) {
+ if (poststat)
+ local->replies[child_index].poststat = *poststat;
+ if (preparent)
+ local->replies[child_index].preparent = *preparent;
+ if (postparent)
+ local->replies[child_index].postparent = *postparent;
+ if (preparent2)
+ local->replies[child_index].preparent2 = *preparent2;
+ if (postparent2)
+ local->replies[child_index].postparent2 = *postparent2;
+ if (fd_ctx)
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ } else {
+ if (op_errno != ENOTEMPTY)
+ afr_transaction_fop_failed(frame, this, child_index);
+ if (fd_ctx)
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ }
+
+ return;
}
-
static int
-__afr_dir_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent,
- struct iatt *preparent2, struct iatt *postparent2,
- dict_t *xdata)
+__afr_dir_write_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ struct iatt *preparent2, struct iatt *postparent2,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- int child_index = (long) cookie;
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- __afr_dir_write_fill (frame, this, child_index, op_ret,
- op_errno, buf, preparent, postparent,
- preparent2, postparent2, xdata);
- }
- UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- __afr_dir_write_finalize (frame, this);
-
- if (afr_txn_nothing_failed (frame, this))
- local->transaction.unwind (frame, this);
+ afr_local_t *local = NULL;
+ int child_index = (long)cookie;
+ int call_count = -1;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ LOCK(&frame->lock);
+ {
+ __afr_dir_write_fill(frame, this, child_index, op_ret, op_errno, buf,
+ preparent, postparent, preparent2, postparent2,
+ xdata);
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
+
+ if (call_count == 0) {
+ __afr_dir_write_finalize(frame, this);
+
+ if (afr_txn_nothing_failed(frame, this)) {
+ /*if it did pre-op, it will do post-op changing ctime*/
+ if (priv->consistent_metadata && afr_needs_changelog_update(local))
+ afr_zero_fill_stat(local);
+ local->transaction.unwind(frame, this);
+ }
- afr_mark_entry_pending_changelog (frame, this);
+ afr_mark_entry_pending_changelog(frame, this);
- local->transaction.resume (frame, this);
- }
+ afr_transaction_resume(frame, this);
+ }
- return 0;
+ return 0;
}
-
int
-afr_mark_new_entry_changelog_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- dict_t *xattr, dict_t *xdata)
+afr_mark_new_entry_changelog_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
{
- int call_count = 0;
+ int call_count = 0;
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return(frame);
- if (call_count == 0)
- AFR_STACK_DESTROY (frame);
+ if (call_count == 0)
+ AFR_STACK_DESTROY(frame);
- return 0;
+ return 0;
}
-
void
-afr_mark_new_entry_changelog (call_frame_t *frame, xlator_t *this)
+afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *new_frame = NULL;
- afr_local_t *local = NULL;
- afr_local_t *new_local = NULL;
- afr_private_t *priv = NULL;
- dict_t *xattr = NULL;
- int32_t **changelog = NULL;
- int i = 0;
- int op_errno = ENOMEM;
- unsigned char *pending = NULL;
- int call_count = 0;
-
- local = frame->local;
- priv = this->private;
-
- new_frame = copy_frame (frame);
- if (!new_frame)
- goto out;
-
- new_local = AFR_FRAME_INIT (new_frame, op_errno);
- if (!new_local)
- goto out;
-
- xattr = dict_new ();
- if (!xattr)
- goto out;
-
- pending = alloca0 (priv->child_count);
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i] &&
- !local->transaction.failed_subvols[i]) {
- call_count ++;
- continue;
- }
- pending[i] = 1;
- }
-
- changelog = afr_mark_pending_changelog (priv, pending, xattr,
- local->cont.dir_fop.buf.ia_type);
- if (!changelog)
- goto out;
-
- new_local->pending = changelog;
- gf_uuid_copy (new_local->loc.gfid, local->cont.dir_fop.buf.ia_gfid);
- new_local->loc.inode = inode_ref (local->inode);
-
- new_local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (pending[i])
- continue;
-
- STACK_WIND_COOKIE (new_frame, afr_mark_new_entry_changelog_cbk,
- (void *) (long) i, priv->children[i],
- priv->children[i]->fops->xattrop,
- &new_local->loc, GF_XATTROP_ADD_ARRAY,
- xattr, NULL);
- if (!--call_count)
- break;
+ call_frame_t *new_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *new_local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int32_t **changelog = NULL;
+ int i = 0;
+ int op_errno = ENOMEM;
+ unsigned char *pending = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ new_frame = copy_frame(frame);
+ if (!new_frame)
+ goto out;
+
+ new_local = AFR_FRAME_INIT(new_frame, op_errno);
+ if (!new_local)
+ goto out;
+
+ xattr = dict_new();
+ if (!xattr)
+ goto out;
+
+ pending = alloca0(priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] &&
+ !local->transaction.failed_subvols[i]) {
+ call_count++;
+ continue;
}
+ pending[i] = 1;
+ }
+
+ changelog = afr_mark_pending_changelog(priv, pending, xattr,
+ local->cont.dir_fop.buf.ia_type);
+ if (!changelog)
+ goto out;
+
+ new_local->pending = changelog;
+ gf_uuid_copy(new_local->loc.gfid, local->cont.dir_fop.buf.ia_gfid);
+ new_local->loc.inode = inode_ref(local->inode);
+
+ new_local->call_count = call_count;
- new_frame = NULL;
+ for (i = 0; i < priv->child_count; i++) {
+ if (pending[i])
+ continue;
+
+ STACK_WIND_COOKIE(new_frame, afr_mark_new_entry_changelog_cbk,
+ (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->xattrop, &new_local->loc,
+ GF_XATTROP_ADD_ARRAY, xattr, NULL);
+ if (!--call_count)
+ break;
+ }
+
+ new_frame = NULL;
out:
- if (new_frame)
- AFR_STACK_DESTROY (new_frame);
- if (xattr)
- dict_unref (xattr);
- return;
+ if (new_frame)
+ AFR_STACK_DESTROY(new_frame);
+ if (xattr)
+ dict_unref(xattr);
+ return;
}
-
void
-afr_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this)
+afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int pre_op_count = 0;
- int failed_count = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int pre_op_count = 0;
+ int failed_count = 0;
+ unsigned char *success_replies = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- if (local->op_ret < 0)
- return;
+ if (local->op_ret < 0)
+ return;
- if (local->op != GF_FOP_CREATE && local->op != GF_FOP_MKNOD)
- return;
+ if (local->op != GF_FOP_CREATE && local->op != GF_FOP_MKNOD &&
+ local->op != GF_FOP_MKDIR)
+ return;
- pre_op_count = AFR_COUNT (local->transaction.pre_op, priv->child_count);
- failed_count = AFR_COUNT (local->transaction.failed_subvols,
- priv->child_count);
+ pre_op_count = AFR_COUNT(local->transaction.pre_op, priv->child_count);
+ failed_count = AFR_COUNT(local->transaction.failed_subvols,
+ priv->child_count);
- if (pre_op_count == priv->child_count && !failed_count)
- return;
+ /* FOP succeeded on all bricks. */
+ if (pre_op_count == priv->child_count && !failed_count)
+ return;
- afr_mark_new_entry_changelog (frame, this);
+ /* FOP did not suceed on quorum no. of bricks. */
+ success_replies = alloca0(priv->child_count);
+ afr_fill_success_replies(local, priv, success_replies);
+ if (!afr_has_quorum(success_replies, this, NULL))
+ return;
+ if (priv->thin_arbiter_count) {
+ /*Mark new entry using ta file*/
+ local->is_new_entry = _gf_true;
return;
-}
+ }
+
+ afr_mark_new_entry_changelog(frame, this);
+ return;
+}
/* {{{ create */
int
-afr_create_unwind (call_frame_t *frame, xlator_t *this)
+afr_create_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- main_frame = afr_transaction_detach_fop_frame (frame);
+ local = frame->local;
- if (!main_frame)
- return 0;
+ main_frame = afr_transaction_detach_fop_frame(frame);
- AFR_STACK_UNWIND (create, main_frame, local->op_ret, local->op_errno,
- local->cont.create.fd, local->inode,
- &local->cont.dir_fop.buf,
- &local->cont.dir_fop.preparent,
- &local->cont.dir_fop.postparent, local->xdata_rsp);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(create, main_frame, local->op_ret, local->op_errno,
+ local->cont.create.fd, local->inode,
+ &local->cont.dir_fop.buf, &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- fd_t *fd, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+afr_create_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
- preparent, postparent, NULL, NULL, xdata);
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-
int
-afr_create_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_create_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- priv = this->private;
-
- STACK_WIND_COOKIE (frame, afr_create_wind_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->create,
- &local->loc, local->cont.create.flags,
- local->cont.create.mode, local->umask,
- local->cont.create.fd, local->xdata_req);
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_create_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->create, &local->loc,
+ local->cont.create.flags, local->cont.create.mode,
+ local->umask, local->cont.create.fd, local->xdata_req);
+ return 0;
}
-
int
-afr_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+afr_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = ENOMEM;
-
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame)
- goto out;
-
- local = AFR_FRAME_INIT (transaction_frame, op_errno);
- if (!local)
- goto out;
-
- loc_copy (&local->loc, loc);
-
- local->fd_ctx = afr_fd_ctx_get (fd, this);
- if (!local->fd_ctx)
- goto out;
-
- local->inode = inode_ref (loc->inode);
- local->parent = inode_ref (loc->parent);
-
- local->op = GF_FOP_CREATE;
- local->cont.create.flags = flags;
- local->cont.create.mode = mode;
- local->cont.create.fd = fd_ref (fd);
- local->umask = umask;
-
- if (xdata)
- local->xdata_req = dict_copy_with_ref (xdata, NULL);
- else
- local->xdata_req = dict_new ();
-
- if (!local->xdata_req)
- goto out;
-
- local->transaction.wind = afr_create_wind;
- local->transaction.fop = __afr_txn_write_fop;
- local->transaction.done = __afr_txn_write_done;
- local->transaction.unwind = afr_create_unwind;
-
- ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
- &op_errno);
- if (ret)
- goto out;
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = 0;
- ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
- &local->transaction.parent_loc,
- local->transaction.basename,
- priv->child_count);
- if (ret)
- goto out;
-
- int_lock->lockee_count++;
- ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+
+ local->fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!local->fd_ctx)
+ goto out;
+
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ local->op = GF_FOP_CREATE;
+ local->cont.create.flags = flags;
+ local->fd_ctx->flags = flags;
+ local->cont.create.mode = mode;
+ local->cont.create.fd = fd_ref(fd);
+ local->umask = umask;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_create_wind;
+ local->transaction.unwind = afr_create_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- AFR_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
+ AFR_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
/* }}} */
@@ -515,518 +502,436 @@ out:
/* {{{ mknod */
int
-afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
+afr_mknod_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- main_frame = afr_transaction_detach_fop_frame (frame);
- if (!main_frame)
- return 0;
+ local = frame->local;
- AFR_STACK_UNWIND (mknod, main_frame, local->op_ret, local->op_errno,
- local->inode, &local->cont.dir_fop.buf,
- &local->cont.dir_fop.preparent,
- &local->cont.dir_fop.postparent, local->xdata_rsp);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(mknod, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+afr_mknod_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
- preparent, postparent, NULL, NULL, xdata);
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-
int
-afr_mknod_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_mknod_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- priv = this->private;
-
- STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->mknod,
- &local->loc, local->cont.mknod.mode,
- local->cont.mknod.dev, local->umask,
- local->xdata_req);
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_mknod_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->mknod, &local->loc,
+ local->cont.mknod.mode, local->cont.mknod.dev,
+ local->umask, local->xdata_req);
+ return 0;
}
int
-afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t dev, mode_t umask, dict_t *xdata)
+afr_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = ENOMEM;
-
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame)
- goto out;
-
- local = AFR_FRAME_INIT (transaction_frame, op_errno);
- if (!local)
- goto out;
-
- loc_copy (&local->loc, loc);
- local->inode = inode_ref (loc->inode);
- local->parent = inode_ref (loc->parent);
-
- local->op = GF_FOP_MKNOD;
- local->cont.mknod.mode = mode;
- local->cont.mknod.dev = dev;
- local->umask = umask;
-
- if (xdata)
- local->xdata_req = dict_copy_with_ref (xdata, NULL);
- else
- local->xdata_req = dict_new ();
-
- if (!local->xdata_req)
- goto out;
-
- local->transaction.wind = afr_mknod_wind;
- local->transaction.fop = __afr_txn_write_fop;
- local->transaction.done = __afr_txn_write_done;
- local->transaction.unwind = afr_mknod_unwind;
-
- ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
- &op_errno);
- if (ret)
- goto out;
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = 0;
- ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
- &local->transaction.parent_loc,
- local->transaction.basename,
- priv->child_count);
- if (ret)
- goto out;
-
- int_lock->lockee_count++;
- ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ local->op = GF_FOP_MKNOD;
+ local->cont.mknod.mode = mode;
+ local->cont.mknod.dev = dev;
+ local->umask = umask;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_mknod_wind;
+ local->transaction.unwind = afr_mknod_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- AFR_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
+ AFR_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
}
/* }}} */
/* {{{ mkdir */
-
int
-afr_mkdir_unwind (call_frame_t *frame, xlator_t *this)
+afr_mkdir_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- main_frame = afr_transaction_detach_fop_frame (frame);
- if (!main_frame)
- return 0;
-
- AFR_STACK_UNWIND (mkdir, main_frame, local->op_ret, local->op_errno,
- local->inode, &local->cont.dir_fop.buf,
- &local->cont.dir_fop.preparent,
- &local->cont.dir_fop.postparent, local->xdata_rsp);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(mkdir, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+afr_mkdir_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
- preparent, postparent, NULL, NULL, xdata);
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-
int
-afr_mkdir_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_mkdir_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->mkdir, &local->loc,
- local->cont.mkdir.mode, local->umask,
- local->xdata_req);
- return 0;
+ STACK_WIND_COOKIE(frame, afr_mkdir_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->mkdir, &local->loc,
+ local->cont.mkdir.mode, local->umask, local->xdata_req);
+ return 0;
}
-
int
-afr_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata)
+afr_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = ENOMEM;
-
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame)
- goto out;
-
- local = AFR_FRAME_INIT (transaction_frame, op_errno);
- if (!local)
- goto out;
-
- loc_copy (&local->loc, loc);
- local->inode = inode_ref (loc->inode);
- local->parent = inode_ref (loc->parent);
-
- local->cont.mkdir.mode = mode;
- local->umask = umask;
-
- if (xdata)
- local->xdata_req = dict_copy_with_ref (xdata, NULL);
- else
- local->xdata_req = dict_new ();
-
- if (!local->xdata_req)
- goto out;
-
- local->op = GF_FOP_MKDIR;
- local->transaction.wind = afr_mkdir_wind;
- local->transaction.fop = __afr_txn_write_fop;
- local->transaction.done = __afr_txn_write_done;
- local->transaction.unwind = afr_mkdir_unwind;
-
- ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
- &op_errno);
- if (ret)
- goto out;
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = 0;
- ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
- &local->transaction.parent_loc,
- local->transaction.basename,
- priv->child_count);
- if (ret)
- goto out;
-
- int_lock->lockee_count++;
- ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ local->cont.mkdir.mode = mode;
+ local->umask = umask;
+
+ if (!xdata || !dict_get_sizen(xdata, "gfid-req")) {
+ op_errno = EPERM;
+ gf_msg_callingfn(this->name, GF_LOG_WARNING, op_errno,
+ AFR_MSG_GFID_NULL,
+ "mkdir: %s is received "
+ "without gfid-req %p",
+ loc->path, xdata);
+ goto out;
+ }
+
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ if (!local->xdata_req) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->op = GF_FOP_MKDIR;
+ local->transaction.wind = afr_mkdir_wind;
+ local->transaction.unwind = afr_mkdir_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- AFR_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
+ AFR_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
}
/* }}} */
/* {{{ link */
-
int
-afr_link_unwind (call_frame_t *frame, xlator_t *this)
+afr_link_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- main_frame = afr_transaction_detach_fop_frame (frame);
- if (!main_frame)
- return 0;
+ local = frame->local;
- AFR_STACK_UNWIND (link, main_frame, local->op_ret, local->op_errno,
- local->inode, &local->cont.dir_fop.buf,
- &local->cont.dir_fop.preparent,
- &local->cont.dir_fop.postparent, local->xdata_rsp);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(link, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+afr_link_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
- preparent, postparent, NULL, NULL, xdata);
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-
int
-afr_link_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_link_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->link,
- &local->loc, &local->newloc, local->xdata_req);
- return 0;
+ STACK_WIND_COOKIE(frame, afr_link_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->link, &local->loc,
+ &local->newloc, local->xdata_req);
+ return 0;
}
-
int
-afr_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
+afr_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = ENOMEM;
-
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame)
- goto out;
-
- local = AFR_FRAME_INIT (transaction_frame, op_errno);
- if (!local)
- goto out;
-
- loc_copy (&local->loc, oldloc);
- loc_copy (&local->newloc, newloc);
-
- local->inode = inode_ref (oldloc->inode);
- local->parent = inode_ref (newloc->parent);
-
- if (xdata)
- local->xdata_req = dict_copy_with_ref (xdata, NULL);
- else
- local->xdata_req = dict_new ();
-
- if (!local->xdata_req)
- goto out;
-
- local->op = GF_FOP_LINK;
-
- local->transaction.wind = afr_link_wind;
- local->transaction.fop = __afr_txn_write_fop;
- local->transaction.done = __afr_txn_write_done;
- local->transaction.unwind = afr_link_unwind;
-
- ret = afr_build_parent_loc (&local->transaction.parent_loc, newloc,
- &op_errno);
- if (ret)
- goto out;
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (newloc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = 0;
- ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
- &local->transaction.parent_loc,
- local->transaction.basename,
- priv->child_count);
- if (ret)
- goto out;
-
- int_lock->lockee_count++;
- ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, oldloc);
+ loc_copy(&local->newloc, newloc);
- return 0;
+ local->inode = inode_ref(oldloc->inode);
+ local->parent = inode_ref(newloc->parent);
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_LINK;
+
+ local->transaction.wind = afr_link_wind;
+ local->transaction.unwind = afr_link_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(newloc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- AFR_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
+ AFR_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
}
/* }}} */
/* {{{ symlink */
-
int
-afr_symlink_unwind (call_frame_t *frame, xlator_t *this)
+afr_symlink_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- main_frame = afr_transaction_detach_fop_frame (frame);
- if (!main_frame)
- return 0;
+ local = frame->local;
- AFR_STACK_UNWIND (symlink, main_frame, local->op_ret, local->op_errno,
- local->inode, &local->cont.dir_fop.buf,
- &local->cont.dir_fop.preparent,
- &local->cont.dir_fop.postparent, local->xdata_rsp);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(symlink, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+afr_symlink_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
- preparent, postparent, NULL, NULL, xdata);
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-
int
-afr_symlink_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_symlink_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- priv = this->private;
-
- STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->symlink,
- local->cont.symlink.linkpath, &local->loc,
- local->umask, local->xdata_req);
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_symlink_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->symlink,
+ local->cont.symlink.linkpath, &local->loc, local->umask,
+ local->xdata_req);
+ return 0;
}
-
int
-afr_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, mode_t umask, dict_t *xdata)
+afr_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = ENOMEM;
-
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame)
- goto out;
-
- local = AFR_FRAME_INIT (transaction_frame, op_errno);
- if (!local)
- goto out;
-
- loc_copy (&local->loc, loc);
- local->inode = inode_ref (loc->inode);
- local->parent = inode_ref (loc->parent);
-
- local->cont.symlink.linkpath = gf_strdup (linkpath);
- local->umask = umask;
-
- if (xdata)
- local->xdata_req = dict_copy_with_ref (xdata, NULL);
- else
- local->xdata_req = dict_new ();
-
- if (!local->xdata_req)
- goto out;
-
- local->op = GF_FOP_SYMLINK;
- local->transaction.wind = afr_symlink_wind;
- local->transaction.fop = __afr_txn_write_fop;
- local->transaction.done = __afr_txn_write_done;
- local->transaction.unwind = afr_symlink_unwind;
-
- ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
- &op_errno);
- if (ret)
- goto out;
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = 0;
- ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
- &local->transaction.parent_loc,
- local->transaction.basename,
- priv->child_count);
- if (ret)
- goto out;
-
- int_lock->lockee_count++;
- ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ local->cont.symlink.linkpath = gf_strdup(linkpath);
+ local->umask = umask;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_SYMLINK;
+ local->transaction.wind = afr_symlink_wind;
+ local->transaction.unwind = afr_symlink_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- AFR_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
+ AFR_STACK_UNWIND(symlink, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
/* }}} */
@@ -1034,161 +939,118 @@ out:
/* {{{ rename */
int
-afr_rename_unwind (call_frame_t *frame, xlator_t *this)
+afr_rename_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- main_frame = afr_transaction_detach_fop_frame (frame);
- if (!main_frame)
- return 0;
+ local = frame->local;
- AFR_STACK_UNWIND (rename, main_frame, local->op_ret, local->op_errno,
- &local->cont.dir_fop.buf,
- &local->cont.dir_fop.preparent,
- &local->cont.dir_fop.postparent,
- &local->cont.dir_fop.prenewparent,
- &local->cont.dir_fop.postnewparent, local->xdata_rsp);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(rename, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.buf, &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ &local->cont.dir_fop.prenewparent,
+ &local->cont.dir_fop.postnewparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
+afr_rename_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
- preoldparent, postoldparent, prenewparent,
- postnewparent, xdata);
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preoldparent, postoldparent, prenewparent,
+ postnewparent, xdata);
}
-
int
-afr_rename_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_rename_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- STACK_WIND_COOKIE (frame, afr_rename_wind_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->rename,
- &local->loc, &local->newloc, local->xdata_req);
- return 0;
+ STACK_WIND_COOKIE(frame, afr_rename_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->rename, &local->loc,
+ &local->newloc, local->xdata_req);
+ return 0;
}
-
int
-afr_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
+afr_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = ENOMEM;
- int nlockee = 0;
-
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- local = AFR_FRAME_INIT (transaction_frame, op_errno);
- if (!local)
- goto out;
-
- loc_copy (&local->loc, oldloc);
- loc_copy (&local->newloc, newloc);
-
- local->inode = inode_ref (oldloc->inode);
- local->parent = inode_ref (oldloc->parent);
- local->parent2 = inode_ref (newloc->parent);
-
- if (xdata)
- local->xdata_req = dict_copy_with_ref (xdata, NULL);
- else
- local->xdata_req = dict_new ();
-
- if (!local->xdata_req)
- goto out;
-
- local->op = GF_FOP_RENAME;
- local->transaction.wind = afr_rename_wind;
- local->transaction.fop = __afr_txn_write_fop;
- local->transaction.done = __afr_txn_write_done;
- local->transaction.unwind = afr_rename_unwind;
-
- ret = afr_build_parent_loc (&local->transaction.parent_loc, oldloc,
- &op_errno);
- if (ret)
- goto out;
- ret = afr_build_parent_loc (&local->transaction.new_parent_loc, newloc,
- &op_errno);
- if (ret)
- goto out;
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (oldloc->path);
- local->transaction.new_basename = AFR_BASENAME (newloc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = nlockee = 0;
- ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename,
- priv->child_count);
- if (ret)
- goto out;
-
- nlockee++;
- ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
- &local->transaction.parent_loc,
- local->transaction.basename,
- priv->child_count);
- if (ret)
- goto out;
-
- nlockee++;
- if (local->newloc.inode && IA_ISDIR (local->newloc.inode->ia_type)) {
- ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
- &local->newloc,
- NULL,
- priv->child_count);
- if (ret)
- goto out;
-
- nlockee++;
- }
- qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee),
- afr_entry_lockee_cmp);
- int_lock->lockee_count = nlockee;
-
- ret = afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, oldloc);
+ loc_copy(&local->newloc, newloc);
+
+ local->inode = inode_ref(oldloc->inode);
+ local->parent = inode_ref(oldloc->parent);
+ local->parent2 = inode_ref(newloc->parent);
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_RENAME;
+ local->transaction.wind = afr_rename_wind;
+ local->transaction.unwind = afr_rename_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, oldloc,
+ &op_errno);
+ if (ret)
+ goto out;
+ ret = afr_build_parent_loc(&local->transaction.new_parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(oldloc->path);
+ local->transaction.new_basename = AFR_BASENAME(newloc->path);
+ ret = afr_transaction(transaction_frame, this,
+ AFR_ENTRY_RENAME_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- AFR_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
+ AFR_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
/* }}} */
@@ -1196,263 +1058,205 @@ out:
/* {{{ unlink */
int
-afr_unlink_unwind (call_frame_t *frame, xlator_t *this)
+afr_unlink_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- main_frame = afr_transaction_detach_fop_frame (frame);
- if (!main_frame)
- return 0;
+ local = frame->local;
- AFR_STACK_UNWIND (unlink, main_frame, local->op_ret, local->op_errno,
- &local->cont.dir_fop.preparent,
- &local->cont.dir_fop.postparent, local->xdata_rsp);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(unlink, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+afr_unlink_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, NULL,
- preparent, postparent, NULL, NULL, xdata);
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
}
-
int
-afr_unlink_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_unlink_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->unlink,
- &local->loc, local->xflag, local->xdata_req);
- return 0;
+ STACK_WIND_COOKIE(frame, afr_unlink_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->unlink, &local->loc,
+ local->xflag, local->xdata_req);
+ return 0;
}
-
int
-afr_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata)
+afr_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = ENOMEM;
-
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame)
- goto out;
-
- local = AFR_FRAME_INIT (transaction_frame, op_errno);
- if (!local)
- goto out;
-
- loc_copy (&local->loc, loc);
- local->xflag = xflag;
-
- local->inode = inode_ref (loc->inode);
- local->parent = inode_ref (loc->parent);
-
- if (xdata)
- local->xdata_req = dict_copy_with_ref (xdata, NULL);
- else
- local->xdata_req = dict_new ();
-
- if (!local->xdata_req)
- goto out;
-
- local->op = GF_FOP_UNLINK;
- local->transaction.wind = afr_unlink_wind;
- local->transaction.fop = __afr_txn_write_fop;
- local->transaction.done = __afr_txn_write_done;
- local->transaction.unwind = afr_unlink_unwind;
-
- ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
- &op_errno);
- if (ret)
- goto out;
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = 0;
- ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
- &local->transaction.parent_loc,
- local->transaction.basename,
- priv->child_count);
- if (ret)
- goto out;
-
- int_lock->lockee_count++;
- ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+ local->xflag = xflag;
+
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_UNLINK;
+ local->transaction.wind = afr_unlink_wind;
+ local->transaction.unwind = afr_unlink_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- AFR_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ AFR_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
/* }}} */
/* {{{ rmdir */
-
-
int
-afr_rmdir_unwind (call_frame_t *frame, xlator_t *this)
+afr_rmdir_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- main_frame = afr_transaction_detach_fop_frame (frame);
- if (!main_frame)
- return 0;
-
- AFR_STACK_UNWIND (rmdir, main_frame, local->op_ret, local->op_errno,
- &local->cont.dir_fop.preparent,
- &local->cont.dir_fop.postparent, local->xdata_rsp);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(rmdir, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+afr_rmdir_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, NULL,
- preparent, postparent, NULL, NULL, xdata);
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
}
-
int
-afr_rmdir_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_rmdir_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->rmdir,
- &local->loc, local->cont.rmdir.flags, local->xdata_req);
- return 0;
+ STACK_WIND_COOKIE(frame, afr_rmdir_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->rmdir, &local->loc,
+ local->cont.rmdir.flags, local->xdata_req);
+ return 0;
}
-
int
-afr_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
- dict_t *xdata)
+afr_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = ENOMEM;
- int nlockee = 0;
-
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame)
- goto out;
-
- local = AFR_FRAME_INIT (transaction_frame, op_errno);
- if (!local)
- goto out;
-
-
- loc_copy (&local->loc, loc);
- local->inode = inode_ref (loc->inode);
- local->parent = inode_ref (loc->parent);
-
- local->cont.rmdir.flags = flags;
-
- if (xdata)
- local->xdata_req = dict_copy_with_ref (xdata, NULL);
- else
- local->xdata_req = dict_new ();
-
- if (!local->xdata_req)
- goto out;
-
- local->op = GF_FOP_RMDIR;
- local->transaction.wind = afr_rmdir_wind;
- local->transaction.fop = __afr_txn_write_fop;
- local->transaction.done = __afr_txn_write_done;
- local->transaction.unwind = afr_rmdir_unwind;
-
- ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
- &op_errno);
- if (ret)
- goto out;
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = nlockee = 0;
- ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
- &local->transaction.parent_loc,
- local->transaction.basename,
- priv->child_count);
- if (ret)
- goto out;
-
- nlockee++;
- ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
- &local->loc,
- NULL,
- priv->child_count);
- if (ret)
- goto out;
-
- nlockee++;
- qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee),
- afr_entry_lockee_cmp);
- int_lock->lockee_count = nlockee;
-
- ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ local->cont.rmdir.flags = flags;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_RMDIR;
+ local->transaction.wind = afr_rmdir_wind;
+ local->transaction.unwind = afr_rmdir_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- AFR_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ AFR_STACK_UNWIND(rmdir, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
/* }}} */
diff --git a/xlators/cluster/afr/src/afr-dir-write.h b/xlators/cluster/afr/src/afr-dir-write.h
index 02f0a3682d9..1d88c3b9b26 100644
--- a/xlators/cluster/afr/src/afr-dir-write.h
+++ b/xlators/cluster/afr/src/afr-dir-write.h
@@ -12,36 +12,35 @@
#define __DIR_WRITE_H__
int32_t
-afr_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *xdata);
+afr_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
int32_t
-afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata);
+afr_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *xdata);
int32_t
-afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
+afr_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
int32_t
-afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata);
+afr_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
int32_t
-afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, dict_t *xdata);
+afr_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata);
int32_t
-afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata);
+afr_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
int32_t
-afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata);
+afr_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
int
-afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *oldloc, mode_t umask, dict_t *params);
+afr_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *oldloc, mode_t umask, dict_t *params);
#endif /* __DIR_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 7fed62a5361..c5521704de2 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -8,7 +8,6 @@
cases as published by the Free Software Foundation.
*/
-
#include <libgen.h>
#include <unistd.h>
#include <fnmatch.h>
@@ -16,28 +15,20 @@
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "byte-order.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "quota-common-utils.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/list.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/quota-common-utils.h>
#include "afr-transaction.h"
+#include "afr-messages.h"
/*
* Quota size xattrs are not maintained by afr. There is a
@@ -49,146 +40,146 @@
* */
int
-afr_handle_quota_size (call_frame_t *frame, xlator_t *this)
+afr_handle_quota_size(call_frame_t *frame, xlator_t *this)
{
- unsigned char *readable = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- struct afr_reply *replies = NULL;
- int i = 0;
- int ret = 0;
- quota_meta_t size = {0, };
- quota_meta_t max_size = {0, };
- int readable_cnt = 0;
- int read_subvol = -1;
-
- local = frame->local;
- priv = this->private;
- replies = local->replies;
-
- readable = alloca0 (priv->child_count);
-
- afr_inode_read_subvol_get (local->inode, this, readable, 0, 0);
-
- readable_cnt = AFR_COUNT (readable, priv->child_count);
-
- for (i = 0; i < priv->child_count; i++) {
- if (!replies[i].valid || replies[i].op_ret == -1)
- continue;
- if (readable_cnt && !readable[i])
- continue;
- if (!replies[i].xdata)
- continue;
- ret = quota_dict_get_meta (replies[i].xdata, QUOTA_SIZE_KEY,
- &size);
- if (ret == -1)
- continue;
- if (read_subvol == -1)
- read_subvol = i;
- if (size.size > max_size.size ||
- (size.file_count + size.dir_count) >
- (max_size.file_count + max_size.dir_count))
- read_subvol = i;
-
- if (size.size > max_size.size)
- max_size.size = size.size;
- if (size.file_count > max_size.file_count)
- max_size.file_count = size.file_count;
- if (size.dir_count > max_size.dir_count)
- max_size.dir_count = size.dir_count;
- }
-
- if (max_size.size == 0 && max_size.file_count == 0 &&
- max_size.dir_count == 0)
- return read_subvol;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!replies[i].valid || replies[i].op_ret == -1)
- continue;
- if (readable_cnt && !readable[i])
- continue;
- if (!replies[i].xdata)
- continue;
- quota_dict_set_meta (replies[i].xdata, QUOTA_SIZE_KEY,
- &max_size, IA_IFDIR);
- }
-
+ unsigned char *readable = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int i = 0;
+ int ret = 0;
+ quota_meta_t size = {
+ 0,
+ };
+ quota_meta_t max_size = {
+ 0,
+ };
+ int readable_cnt = 0;
+ int read_subvol = -1;
+
+ local = frame->local;
+ priv = this->private;
+ replies = local->replies;
+
+ readable = alloca0(priv->child_count);
+
+ afr_inode_read_subvol_get(local->inode, this, readable, 0, 0);
+
+ readable_cnt = AFR_COUNT(readable, priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (readable_cnt && !readable[i])
+ continue;
+ if (!replies[i].xdata)
+ continue;
+ ret = quota_dict_get_meta(replies[i].xdata, QUOTA_SIZE_KEY,
+ SLEN(QUOTA_SIZE_KEY), &size);
+ if (ret == -1)
+ continue;
+ if (read_subvol == -1)
+ read_subvol = i;
+ if (size.size > max_size.size ||
+ (size.file_count + size.dir_count) >
+ (max_size.file_count + max_size.dir_count))
+ read_subvol = i;
+
+ if (size.size > max_size.size)
+ max_size.size = size.size;
+ if (size.file_count > max_size.file_count)
+ max_size.file_count = size.file_count;
+ if (size.dir_count > max_size.dir_count)
+ max_size.dir_count = size.dir_count;
+ }
+
+ if (max_size.size == 0 && max_size.file_count == 0 &&
+ max_size.dir_count == 0)
return read_subvol;
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (readable_cnt && !readable[i])
+ continue;
+ if (!replies[i].xdata)
+ continue;
+ quota_dict_set_meta(replies[i].xdata, QUOTA_SIZE_KEY, &max_size,
+ IA_IFDIR);
+ }
+
+ return read_subvol;
+}
/* {{{ access */
int
-afr_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+afr_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (op_ret < 0) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- afr_read_txn_continue (frame, this, (long) cookie);
- return 0;
- }
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
- AFR_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
+ AFR_STACK_UNWIND(access, frame, op_ret, op_errno, xdata);
- return 0;
+ return 0;
}
-
int
-afr_access_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_access_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- priv = this->private;
- local = frame->local;
-
- if (subvol == -1) {
- AFR_STACK_UNWIND (access, frame, local->op_ret,
- local->op_errno, 0);
- return 0;
- }
-
- STACK_WIND_COOKIE (frame, afr_access_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->access,
- &local->loc, local->cont.access.mask,
- local->xdata_req);
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(access, frame, local->op_ret, local->op_errno, 0);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE(frame, afr_access_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->access, &local->loc,
+ local->cont.access.mask, local->xdata_req);
+ return 0;
}
int
-afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int mask, dict_t *xdata)
+afr_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int mask,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- int op_errno = 0;
+ afr_local_t *local = NULL;
+ int op_errno = 0;
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- local->op = GF_FOP_ACCESS;
- loc_copy (&local->loc, loc);
- local->cont.access.mask = mask;
- if (xdata)
- local->xdata_req = dict_ref (xdata);
+ local->op = GF_FOP_ACCESS;
+ loc_copy(&local->loc, loc);
+ local->cont.access.mask = mask;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
- afr_read_txn (frame, this, loc->inode, afr_access_wind,
- AFR_METADATA_TRANSACTION);
+ afr_read_txn(frame, this, loc->inode, afr_access_wind,
+ AFR_METADATA_TRANSACTION);
- return 0;
+ return 0;
out:
- AFR_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+ AFR_STACK_UNWIND(access, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
/* }}} */
@@ -196,152 +187,140 @@ out:
/* {{{ stat */
int
-afr_stat_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf, dict_t *xdata)
+afr_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (op_ret < 0) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- afr_read_txn_continue (frame, this, (long) cookie);
- return 0;
- }
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
- AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
+ AFR_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata);
- return 0;
+ return 0;
}
-
int
-afr_stat_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_stat_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- priv = this->private;
- local = frame->local;
-
- if (subvol == -1) {
- AFR_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno,
- 0, 0);
- return 0;
- }
-
- STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->stat,
- &local->loc, local->xdata_req);
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno, 0, 0);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE(
+ frame, afr_stat_cbk, (void *)(long)subvol, priv->children[subvol],
+ priv->children[subvol]->fops->stat, &local->loc, local->xdata_req);
+ return 0;
}
int
-afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+afr_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int op_errno = 0;
+ afr_local_t *local = NULL;
+ int op_errno = 0;
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- local->op = GF_FOP_STAT;
- loc_copy (&local->loc, loc);
- if (xdata)
- local->xdata_req = dict_ref (xdata);
+ local->op = GF_FOP_STAT;
+ loc_copy(&local->loc, loc);
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
- afr_read_txn (frame, this, loc->inode, afr_stat_wind,
- AFR_DATA_TRANSACTION);
+ afr_read_txn(frame, this, loc->inode, afr_stat_wind, AFR_DATA_TRANSACTION);
- return 0;
+ return 0;
out:
- AFR_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND(stat, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-
/* }}} */
/* {{{ fstat */
int
-afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
+afr_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (op_ret < 0) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- afr_read_txn_continue (frame, this, (long) cookie);
- return 0;
- }
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
- AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
+ AFR_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata);
- return 0;
+ return 0;
}
-
int
-afr_fstat_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_fstat_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- priv = this->private;
- local = frame->local;
-
- if (subvol == -1) {
- AFR_STACK_UNWIND (fstat, frame, local->op_ret, local->op_errno,
- 0, 0);
- return 0;
- }
-
- STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->fstat,
- local->fd, local->xdata_req);
- return 0;
-}
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ priv = this->private;
+ local = frame->local;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno, 0, 0);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE(
+ frame, afr_fstat_cbk, (void *)(long)subvol, priv->children[subvol],
+ priv->children[subvol]->fops->fstat, local->fd, local->xdata_req);
+ return 0;
+}
int32_t
-afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *xdata)
+afr_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int op_errno = 0;
+ afr_local_t *local = NULL;
+ int op_errno = 0;
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- local->op = GF_FOP_FSTAT;
- local->fd = fd_ref (fd);
- if (xdata)
- local->xdata_req = dict_ref (xdata);
+ local->op = GF_FOP_FSTAT;
+ local->fd = fd_ref(fd);
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
- afr_fix_open (fd, this);
+ afr_fix_open(fd, this);
- afr_read_txn (frame, this, fd->inode, afr_fstat_wind,
- AFR_DATA_TRANSACTION);
+ afr_read_txn(frame, this, fd->inode, afr_fstat_wind, AFR_DATA_TRANSACTION);
- return 0;
+ return 0;
out:
- AFR_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND(fstat, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
/* }}} */
@@ -349,1394 +328,1567 @@ out:
/* {{{ readlink */
int
-afr_readlink_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- const char *buf, struct iatt *sbuf, dict_t *xdata)
+afr_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *buf,
+ struct iatt *sbuf, dict_t *xdata)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (op_ret < 0) {
- local->op_ret = -1;
- local->op_errno = op_errno;
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- afr_read_txn_continue (frame, this, (long) cookie);
- return 0;
- }
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
- AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno,
- buf, sbuf, xdata);
- return 0;
+ AFR_STACK_UNWIND(readlink, frame, op_ret, op_errno, buf, sbuf, xdata);
+ return 0;
}
int
-afr_readlink_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_readlink_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- priv = this->private;
-
- if (subvol == -1) {
- AFR_STACK_UNWIND (readlink, frame, local->op_ret,
- local->op_errno, 0, 0, 0);
- return 0;
- }
-
- STACK_WIND_COOKIE (frame, afr_readlink_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->readlink,
- &local->loc, local->cont.readlink.size,
- local->xdata_req);
- return 0;
-}
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ local = frame->local;
+ priv = this->private;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(readlink, frame, local->op_ret, local->op_errno, 0, 0,
+ 0);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE(frame, afr_readlink_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readlink, &local->loc,
+ local->cont.readlink.size, local->xdata_req);
+ return 0;
+}
int
-afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size, dict_t *xdata)
+afr_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
- afr_local_t * local = NULL;
- int32_t op_errno = 0;
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- local->op = GF_FOP_READLINK;
- loc_copy (&local->loc, loc);
- local->cont.readlink.size = size;
- if (xdata)
- local->xdata_req = dict_ref (xdata);
+ local->op = GF_FOP_READLINK;
+ loc_copy(&local->loc, loc);
+ local->cont.readlink.size = size;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
- afr_read_txn (frame, this, loc->inode, afr_readlink_wind,
- AFR_DATA_TRANSACTION);
+ afr_read_txn(frame, this, loc->inode, afr_readlink_wind,
+ AFR_DATA_TRANSACTION);
- return 0;
+ return 0;
out:
- AFR_STACK_UNWIND(readlink, frame, -1, op_errno, 0, 0, 0);
+ AFR_STACK_UNWIND(readlink, frame, -1, op_errno, 0, 0, 0);
- return 0;
+ return 0;
}
-
/* }}} */
/* {{{ getxattr */
struct _xattr_key {
- char *key;
- struct list_head list;
+ char *key;
+ struct list_head list;
};
-
int
-__gather_xattr_keys (dict_t *dict, char *key, data_t *value,
- void *data)
+__gather_xattr_keys(dict_t *dict, char *key, data_t *value, void *data)
{
- struct list_head * list = data;
- struct _xattr_key * xkey = NULL;
+ struct list_head *list = data;
+ struct _xattr_key *xkey = NULL;
- if (!strncmp (key, AFR_XATTR_PREFIX,
- strlen (AFR_XATTR_PREFIX))) {
+ if (!strncmp(key, AFR_XATTR_PREFIX, SLEN(AFR_XATTR_PREFIX))) {
+ xkey = GF_MALLOC(sizeof(*xkey), gf_afr_mt_xattr_key);
+ if (!xkey)
+ return -1;
- xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key);
- if (!xkey)
- return -1;
+ xkey->key = key;
+ INIT_LIST_HEAD(&xkey->list);
- xkey->key = key;
- INIT_LIST_HEAD (&xkey->list);
-
- list_add_tail (&xkey->list, list);
- }
- return 0;
+ list_add_tail(&xkey->list, list);
+ }
+ return 0;
}
-
void
-afr_filter_xattrs (dict_t *dict)
+afr_filter_xattrs(dict_t *dict)
{
- struct list_head keys = {0,};
- struct _xattr_key *key = NULL;
- struct _xattr_key *tmp = NULL;
+ struct list_head keys = {
+ 0,
+ };
+ struct _xattr_key *key = NULL;
+ struct _xattr_key *tmp = NULL;
- INIT_LIST_HEAD (&keys);
+ INIT_LIST_HEAD(&keys);
- dict_foreach (dict, __gather_xattr_keys,
- (void *) &keys);
+ dict_foreach(dict, __gather_xattr_keys, (void *)&keys);
- list_for_each_entry_safe (key, tmp, &keys, list) {
- dict_del (dict, key->key);
+ list_for_each_entry_safe(key, tmp, &keys, list)
+ {
+ dict_del(dict, key->key);
- list_del_init (&key->list);
+ list_del_init(&key->list);
- GF_FREE (key);
- }
+ GF_FREE(key);
+ }
}
+static gf_boolean_t
+afr_getxattr_ignorable_errnos(int32_t op_errno)
+{
+ if (op_errno == ENODATA || op_errno == ENOTSUP || op_errno == ERANGE ||
+ op_errno == ENAMETOOLONG)
+ return _gf_true;
+ return _gf_false;
+}
int
-afr_getxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+afr_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (op_ret < 0) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
+ if (op_ret < 0 && !afr_getxattr_ignorable_errnos(op_errno)) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- afr_read_txn_continue (frame, this, (long) cookie);
- return 0;
- }
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
- if (dict)
- afr_filter_xattrs (dict);
+ if (dict)
+ afr_filter_xattrs(dict);
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+ AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ return 0;
}
-
int
-afr_getxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_getxattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- priv = this->private;
-
- if (subvol == -1) {
- AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
- local->op_errno, NULL, NULL);
- return 0;
- }
-
- STACK_WIND_COOKIE (frame, afr_getxattr_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->getxattr,
- &local->loc, local->cont.getxattr.name,
- local->xdata_req);
- return 0;
-}
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ local = frame->local;
+ priv = this->private;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno, NULL,
+ NULL);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE(frame, afr_getxattr_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->getxattr, &local->loc,
+ local->cont.getxattr.name, local->xdata_req);
+ return 0;
+}
int32_t
-afr_getxattr_unwind (call_frame_t *frame, int op_ret, int op_errno,
- dict_t *dict, dict_t *xdata)
+afr_getxattr_unwind(call_frame_t *frame, int op_ret, int op_errno, dict_t *dict,
+ dict_t *xdata)
{
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
int32_t
-afr_fgetxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+afr_fgetxattr_clrlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- dict_t *xattr = NULL;
- char *tmp_report = NULL;
- char lk_summary[1024] = {0,};
- int serz_len = 0;
- int32_t callcnt = 0;
- long int cky = 0;
- int ret = 0;
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
- cky = (long) cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1)
- local->replies[cky].op_errno = op_errno;
-
- if (!local->dict)
- local->dict = dict_new ();
- if (local->dict) {
- ret = dict_get_str (dict, local->cont.getxattr.name,
- &tmp_report);
- if (ret)
- goto unlock;
- ret = dict_set_dynstr (local->dict,
- children[cky]->name,
- gf_strdup (tmp_report));
- if (ret)
- goto unlock;
- }
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {
+ 0,
+ };
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
+ int keylen = 0;
+ int children_keylen = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ cky = (long)cookie;
+ keylen = strlen(local->cont.getxattr.name);
+ children_keylen = strlen(children[cky]->name);
+
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->replies[cky].op_errno = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new();
+ if (local->dict) {
+ ret = dict_get_strn(dict, local->cont.getxattr.name, keylen,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstrn(local->dict, children[cky]->name,
+ children_keylen, gf_strdup(tmp_report));
+ if (ret)
+ goto unlock;
}
+ }
unlock:
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- xattr = dict_new ();
- if (!xattr) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
- ret = dict_serialize_value_with_delim (local->dict,
- lk_summary,
- &serz_len, '\n');
- if (ret) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Error serializing dictionary");
- goto unwind;
- }
- if (serz_len == -1)
- snprintf (lk_summary, sizeof (lk_summary),
- "No locks cleared.");
- ret = dict_set_dynstr (xattr, local->cont.getxattr.name,
- gf_strdup (lk_summary));
- if (ret) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Error setting dictionary");
- goto unwind;
- }
+ UNLOCK(&frame->lock);
+
+ if (!callcnt) {
+ xattr = dict_new();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim(local->dict, lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf(lk_summary, sizeof(lk_summary), "No locks cleared.");
+ ret = dict_set_dynstrn(xattr, local->cont.getxattr.name, keylen,
+ gf_strdup(lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_DICT_SET_FAILED,
+ "Error setting dictionary");
+ goto unwind;
+ }
- unwind:
- // Updating child_errno with more recent 'events'
- op_errno = afr_final_errno (local, priv);
+ op_errno = afr_final_errno(local, priv);
- AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, xattr,
- xdata);
- if (xattr)
- dict_unref (xattr);
- }
+ unwind:
+ AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, xattr, xdata);
+ if (xattr)
+ dict_unref(xattr);
+ }
- return ret;
+ return ret;
}
int32_t
-afr_getxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+afr_getxattr_clrlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- dict_t *xattr = NULL;
- char *tmp_report = NULL;
- char lk_summary[1024] = {0,};
- int serz_len = 0;
- int32_t callcnt = 0;
- long int cky = 0;
- int ret = 0;
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
- cky = (long) cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1)
- local->replies[cky].op_errno = op_errno;
-
- if (!local->dict)
- local->dict = dict_new ();
- if (local->dict) {
- ret = dict_get_str (dict, local->cont.getxattr.name,
- &tmp_report);
- if (ret)
- goto unlock;
- ret = dict_set_dynstr (local->dict,
- children[cky]->name,
- gf_strdup (tmp_report));
- if (ret)
- goto unlock;
- }
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {
+ 0,
+ };
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
+ int keylen = 0;
+ int children_keylen = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ cky = (long)cookie;
+
+ keylen = strlen(local->cont.getxattr.name);
+ children_keylen = strlen(children[cky]->name);
+
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->replies[cky].op_errno = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new();
+ if (local->dict) {
+ ret = dict_get_strn(dict, local->cont.getxattr.name, keylen,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstrn(local->dict, children[cky]->name,
+ children_keylen, gf_strdup(tmp_report));
+ if (ret)
+ goto unlock;
}
+ }
unlock:
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- xattr = dict_new ();
- if (!xattr) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
- ret = dict_serialize_value_with_delim (local->dict,
- lk_summary,
- &serz_len, '\n');
- if (ret) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Error serializing dictionary");
- goto unwind;
- }
- if (serz_len == -1)
- snprintf (lk_summary, sizeof (lk_summary),
- "No locks cleared.");
- ret = dict_set_dynstr (xattr, local->cont.getxattr.name,
- gf_strdup (lk_summary));
- if (ret) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Error setting dictionary");
- goto unwind;
- }
+ UNLOCK(&frame->lock);
+
+ if (!callcnt) {
+ xattr = dict_new();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim(local->dict, lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf(lk_summary, sizeof(lk_summary), "No locks cleared.");
+ ret = dict_set_dynstrn(xattr, local->cont.getxattr.name, keylen,
+ gf_strdup(lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_DICT_SET_FAILED,
+ "Error setting dictionary");
+ goto unwind;
+ }
- unwind:
- // Updating child_errno with more recent 'events'
- op_errno = afr_final_errno (local, priv);
+ op_errno = afr_final_errno(local, priv);
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+ unwind:
+ AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata);
- if (xattr)
- dict_unref (xattr);
- }
+ if (xattr)
+ dict_unref(xattr);
+ }
- return ret;
+ return ret;
}
/**
* node-uuid cbk uses next child querying mechanism
*/
int32_t
-afr_getxattr_node_uuid_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+afr_getxattr_node_uuid_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int unwind = 1;
- int curr_call_child = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int unwind = 1;
+ int curr_call_child = 0;
- priv = this->private;
- children = priv->children;
+ priv = this->private;
+ children = priv->children;
- local = frame->local;
+ local = frame->local;
- if (op_ret == -1) { /** query the _next_ child */
-
- /**
- * _current_ becomes _next_
- * If done with all childs and yet no success; give up !
- */
- curr_call_child = (int) ((long)cookie);
- if (++curr_call_child == priv->child_count)
- goto unwind;
-
- gf_log (this->name, GF_LOG_WARNING,
- "op_ret (-1): Re-querying afr-child (%d/%d)",
- curr_call_child, priv->child_count);
-
- unwind = 0;
- STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
- (void *) (long) curr_call_child,
- children[curr_call_child],
- children[curr_call_child]->fops->getxattr,
- &local->loc,
- local->cont.getxattr.name,
- NULL);
+ if (op_ret == -1) { /** query the _next_ child */
+
+ /**
+ * _current_ becomes _next_
+ * If done with all children and yet no success; give up !
+ */
+ curr_call_child = (int)((long)cookie);
+ if (++curr_call_child == priv->child_count)
+ goto unwind;
+
+ gf_msg_debug(this->name, op_errno,
+ "op_ret (-1): Re-querying afr-child (%d/%d)",
+ curr_call_child, priv->child_count);
+
+ unwind = 0;
+ STACK_WIND_COOKIE(
+ frame, afr_getxattr_node_uuid_cbk, (void *)(long)curr_call_child,
+ children[curr_call_child],
+ children[curr_call_child]->fops->getxattr, &local->loc,
+ local->cont.getxattr.name, local->xdata_req);
+ }
+
+unwind:
+ if (unwind)
+ AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+
+ return 0;
+}
+
+/**
+ * list-node-uuids cbk returns the list of node_uuids for the subvolume.
+ */
+int32_t
+afr_getxattr_list_node_uuids_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr_serz = NULL;
+ long cky = 0;
+ int32_t tlen = 0;
+
+ local = frame->local;
+ priv = this->private;
+ cky = (long)cookie;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
+ local->replies[cky].valid = 1;
+ local->replies[cky].op_ret = op_ret;
+ local->replies[cky].op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto unlock;
+
+ local->op_ret = 0;
+
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
+ local->replies[cky].xattr = dict_ref(dict);
+ }
+
+unlock:
+ UNLOCK(&frame->lock);
+
+ if (!callcnt) {
+ if (local->op_ret != 0) {
+ /* All bricks gave an error. */
+ local->op_errno = afr_final_errno(local, priv);
+ goto unwind;
}
- unwind:
- if (unwind)
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict,
- NULL);
+ /*Since we store the UUID0_STR as node uuid for down bricks and
+ *for non zero op_ret, assigning length to priv->child_count
+ *number of uuids*/
+ local->cont.getxattr.xattr_len = (SLEN(UUID0_STR) + 2) *
+ priv->child_count;
+
+ if (!local->dict)
+ local->dict = dict_new();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
- return 0;
+ xattr_serz = GF_CALLOC(local->cont.getxattr.xattr_len, sizeof(char),
+ gf_common_mt_char);
+
+ if (!xattr_serz) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ ret = afr_serialize_xattrs_with_delimiter(frame, this, xattr_serz,
+ UUID0_STR, &tlen, ' ');
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ GF_FREE(xattr_serz);
+ goto unwind;
+ }
+ ret = dict_set_dynstr_sizen(local->dict, GF_XATTR_LIST_NODE_UUIDS_KEY,
+ xattr_serz);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set node_uuid key in dict");
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ if (ret == -EINVAL)
+ GF_FREE(xattr_serz);
+ } else {
+ local->op_ret = local->cont.getxattr.xattr_len - 1;
+ local->op_errno = 0;
+ }
+
+ unwind:
+ AFR_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno,
+ local->dict, local->xdata_rsp);
+ }
+
+ return ret;
}
int32_t
-afr_getxattr_quota_size_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+afr_getxattr_quota_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- int idx = (long) cookie;
- int call_count = 0;
- afr_local_t *local = frame->local;
- int read_subvol = -1;
-
- local->replies[idx].valid = 1;
- local->replies[idx].op_ret = op_ret;
- local->replies[idx].op_errno = op_errno;
- if (dict)
- local->replies[idx].xdata = dict_ref (dict);
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->inode = inode_ref (local->loc.inode);
- read_subvol = afr_handle_quota_size (frame, this);
- if (read_subvol != -1) {
- op_ret = local->replies[read_subvol].op_ret;
- op_errno = local->replies[read_subvol].op_errno;
- dict = local->replies[read_subvol].xdata;
- }
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict,
- xdata);
+ int idx = (long)cookie;
+ int call_count = 0;
+ afr_local_t *local = frame->local;
+ int read_subvol = -1;
+
+ local->replies[idx].valid = 1;
+ local->replies[idx].op_ret = op_ret;
+ local->replies[idx].op_errno = op_errno;
+ if (dict)
+ local->replies[idx].xdata = dict_ref(dict);
+ call_count = afr_frame_return(frame);
+ if (call_count == 0) {
+ local->inode = inode_ref(local->loc.inode);
+ read_subvol = afr_handle_quota_size(frame, this);
+ if (read_subvol != -1) {
+ op_ret = local->replies[read_subvol].op_ret;
+ op_errno = local->replies[read_subvol].op_errno;
+ dict = local->replies[read_subvol].xdata;
}
+ AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+ }
- return 0;
+ return 0;
}
int32_t
-afr_getxattr_lockinfo_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+afr_getxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- int call_cnt = 0, len = 0;
- char *lockinfo_buf = NULL;
- dict_t *lockinfo = NULL, *newdict = NULL;
- afr_local_t *local = NULL;
-
- LOCK (&frame->lock);
- {
- local = frame->local;
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
- call_cnt = --local->call_count;
+ LOCK(&frame->lock);
+ {
+ local = frame->local;
- if ((op_ret < 0) || (!dict && !xdata)) {
- goto unlock;
- }
+ call_cnt = --local->call_count;
- if (xdata) {
- if (!local->xdata_rsp) {
- local->xdata_rsp = dict_new ();
- if (!local->xdata_rsp) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto unlock;
- }
- }
- }
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
- if (!dict) {
- goto unlock;
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
}
+ }
+ }
- op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
- (void **)&lockinfo_buf, &len);
+ if (!dict) {
+ goto unlock;
+ }
- if (!lockinfo_buf) {
- goto unlock;
- }
+ op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
- if (!local->dict) {
- local->dict = dict_new ();
- if (!local->dict) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto unlock;
- }
- }
- }
-unlock:
- UNLOCK (&frame->lock);
-
- if (lockinfo_buf != NULL) {
- lockinfo = dict_new ();
- if (lockinfo == NULL) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- } else {
- op_ret = dict_unserialize (lockinfo_buf, len,
- &lockinfo);
-
- if (lockinfo && local->dict) {
- dict_copy (lockinfo, local->dict);
- }
- }
+ if (!lockinfo_buf) {
+ goto unlock;
}
- if (xdata && local->xdata_rsp) {
- dict_copy (xdata, local->xdata_rsp);
+ if (!local->dict) {
+ local->dict = dict_new();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
}
+ }
+unlock:
+ UNLOCK(&frame->lock);
- if (!call_cnt) {
- newdict = dict_new ();
- if (!newdict) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto unwind;
- }
-
- len = dict_serialized_length (local->dict);
- if (len <= 0) {
- goto unwind;
- }
-
- lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char);
- if (!lockinfo_buf) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto unwind;
- }
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo);
- op_ret = dict_serialize (local->dict, lockinfo_buf);
- if (op_ret < 0) {
- local->op_ret = -1;
- local->op_errno = -op_ret;
- }
+ if (lockinfo && local->dict) {
+ dict_copy(lockinfo, local->dict);
+ }
+ }
+ }
+
+ if (xdata && local->xdata_rsp) {
+ dict_copy(xdata, local->xdata_rsp);
+ }
+
+ if (!call_cnt) {
+ newdict = dict_new();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
- op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY,
- (void *)lockinfo_buf, len);
- if (op_ret < 0) {
- local->op_ret = -1;
- local->op_errno = -op_ret;
- goto unwind;
- }
+ op_ret = dict_allocate_and_serialize(
+ local->dict, (char **)&lockinfo_buf, (unsigned int *)&len);
+ if (op_ret != 0) {
+ local->op_ret = -1;
+ goto unwind;
+ }
- unwind:
- AFR_STACK_UNWIND (getxattr, frame, op_ret,
- op_errno, newdict,
- local->xdata_rsp);
+ op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
}
- dict_unref (lockinfo);
+ unwind:
+ AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, newdict,
+ local->xdata_rsp);
+ }
- return 0;
+ dict_unref(lockinfo);
+
+ return 0;
}
int32_t
-afr_fgetxattr_lockinfo_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+afr_fgetxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- int call_cnt = 0, len = 0;
- char *lockinfo_buf = NULL;
- dict_t *lockinfo = NULL, *newdict = NULL;
- afr_local_t *local = NULL;
-
- LOCK (&frame->lock);
- {
- local = frame->local;
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
- call_cnt = --local->call_count;
+ LOCK(&frame->lock);
+ {
+ local = frame->local;
- if ((op_ret < 0) || (!dict && !xdata)) {
- goto unlock;
- }
+ call_cnt = --local->call_count;
- if (xdata) {
- if (!local->xdata_rsp) {
- local->xdata_rsp = dict_new ();
- if (!local->xdata_rsp) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto unlock;
- }
- }
- }
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
- if (!dict) {
- goto unlock;
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
}
+ }
+ }
- op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
- (void **)&lockinfo_buf, &len);
+ if (!dict) {
+ goto unlock;
+ }
- if (!lockinfo_buf) {
- goto unlock;
- }
+ op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
- if (!local->dict) {
- local->dict = dict_new ();
- if (!local->dict) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto unlock;
- }
- }
- }
-unlock:
- UNLOCK (&frame->lock);
-
- if (lockinfo_buf != NULL) {
- lockinfo = dict_new ();
- if (lockinfo == NULL) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- } else {
- op_ret = dict_unserialize (lockinfo_buf, len,
- &lockinfo);
-
- if (lockinfo && local->dict) {
- dict_copy (lockinfo, local->dict);
- }
- }
+ if (!lockinfo_buf) {
+ goto unlock;
}
- if (xdata && local->xdata_rsp) {
- dict_copy (xdata, local->xdata_rsp);
+ if (!local->dict) {
+ local->dict = dict_new();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
}
+ }
+unlock:
+ UNLOCK(&frame->lock);
- if (!call_cnt) {
- newdict = dict_new ();
- if (!newdict) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto unwind;
- }
-
- len = dict_serialized_length (local->dict);
- if (len <= 0) {
- goto unwind;
- }
-
- lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char);
- if (!lockinfo_buf) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto unwind;
- }
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo);
- op_ret = dict_serialize (local->dict, lockinfo_buf);
- if (op_ret < 0) {
- local->op_ret = -1;
- local->op_errno = -op_ret;
- }
+ if (lockinfo && local->dict) {
+ dict_copy(lockinfo, local->dict);
+ }
+ }
+ }
+
+ if (xdata && local->xdata_rsp) {
+ dict_copy(xdata, local->xdata_rsp);
+ }
+
+ if (!call_cnt) {
+ newdict = dict_new();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
- op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY,
- (void *)lockinfo_buf, len);
- if (op_ret < 0) {
- local->op_ret = -1;
- local->op_errno = -op_ret;
- goto unwind;
- }
+ op_ret = dict_allocate_and_serialize(
+ local->dict, (char **)&lockinfo_buf, (unsigned int *)&len);
+ if (op_ret != 0) {
+ local->op_ret = -1;
+ goto unwind;
+ }
- unwind:
- AFR_STACK_UNWIND (fgetxattr, frame, op_ret,
- op_errno, newdict,
- local->xdata_rsp);
+ op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
}
- dict_unref (lockinfo);
+ unwind:
+ AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, newdict,
+ local->xdata_rsp);
+ }
- return 0;
+ dict_unref(lockinfo);
+
+ return 0;
}
int32_t
-afr_fgetxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+afr_fgetxattr_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- int32_t callcnt = 0;
- int ret = 0;
- char *xattr = NULL;
- char *xattr_serz = NULL;
- char xattr_cky[1024] = {0,};
- dict_t *nxattr = NULL;
- long cky = 0;
- int32_t padding = 0;
- int32_t tlen = 0;
-
- if (!frame || !frame->local || !this) {
- gf_log ("", GF_LOG_ERROR, "possible NULL deref");
- goto out;
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ int keylen = 0;
+ char xattr_cky[1024] = {
+ 0,
+ };
+ int xattr_cky_len = 0;
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_msg("", GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+ cky = (long)cookie;
+ keylen = strlen(local->cont.getxattr.name);
+ xattr_cky_len = snprintf(xattr_cky, sizeof(xattr_cky), "%s-%ld",
+ local->cont.getxattr.name, cky);
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret < 0) {
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
}
- local = frame->local;
- cky = (long) cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret < 0) {
- local->op_errno = op_errno;
- } else {
- local->op_ret = op_ret;
- if (!local->xdata_rsp && xdata)
- local->xdata_rsp = dict_ref (xdata);
- }
+ if (!dict || (op_ret < 0))
+ goto unlock;
- if (!dict || (op_ret < 0))
- goto unlock;
-
- if (!local->dict)
- local->dict = dict_new ();
-
- if (local->dict) {
- ret = dict_get_str (dict,
- local->cont.getxattr.name,
- &xattr);
- if (ret)
- goto unlock;
-
- xattr = gf_strdup (xattr);
-
- (void)snprintf (xattr_cky, 1024, "%s-%ld",
- local->cont.getxattr.name, cky);
- ret = dict_set_dynstr (local->dict,
- xattr_cky, xattr);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Cannot set xattr cookie key");
- goto unlock;
- }
-
- local->cont.getxattr.xattr_len
- += strlen (xattr) + 1;
- }
+ if (!local->dict) {
+ local->dict = dict_new();
+ if (!local->dict)
+ goto unlock;
+ }
+ ret = dict_get_strn(dict, local->cont.getxattr.name, keylen, &xattr);
+ if (ret)
+ goto unlock;
+
+ xattr = gf_strdup(xattr);
+
+ ret = dict_set_dynstrn(local->dict, xattr_cky, xattr_cky_len, xattr);
+ if (ret) {
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set xattr cookie key");
+ goto post_unlock;
}
-unlock:
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (!local->cont.getxattr.xattr_len)
- goto unwind;
-
- nxattr = dict_new ();
- if (!nxattr)
- goto unwind;
-
- /* extra bytes for decorations (brackets and <>'s) */
- padding += strlen (this->name)
- + strlen (AFR_PATHINFO_HEADER) + 4;
- local->cont.getxattr.xattr_len += (padding + 2);
-
- xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
- sizeof (char), gf_common_mt_char);
-
- if (!xattr_serz)
- goto unwind;
-
- /* the xlator info */
- (void) sprintf (xattr_serz, "(<"AFR_PATHINFO_HEADER"%s> ",
- this->name);
-
- /* actual series of pathinfo */
- ret = dict_serialize_value_with_delim (local->dict,
- xattr_serz
- + strlen (xattr_serz),
- &tlen, ' ');
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Error serializing"
- " dictionary");
- goto unwind;
- }
- /* closing part */
- *(xattr_serz + padding + tlen) = ')';
- *(xattr_serz + padding + tlen + 1) = '\0';
+ local->cont.getxattr.xattr_len += strlen(xattr) + 1;
+ }
+unlock:
+ UNLOCK(&frame->lock);
+post_unlock:
+ if (!callcnt) {
+ if (!local->cont.getxattr.xattr_len)
+ goto unwind;
+
+ nxattr = dict_new();
+ if (!nxattr)
+ goto unwind;
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding += strlen(this->name) + SLEN(AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
+
+ xattr_serz = GF_MALLOC(local->cont.getxattr.xattr_len,
+ gf_common_mt_char);
+
+ if (!xattr_serz)
+ goto unwind;
+
+ /* the xlator info */
+ int xattr_serz_len = sprintf(
+ xattr_serz, "(<" AFR_PATHINFO_HEADER "%s> ", this->name);
+
+ /* actual series of pathinfo */
+ ret = dict_serialize_value_with_delim(
+ local->dict, xattr_serz + xattr_serz_len, &tlen, ' ');
+ if (ret) {
+ GF_FREE(xattr_serz);
+ goto unwind;
+ }
- ret = dict_set_dynstr (nxattr, local->cont.getxattr.name,
- xattr_serz);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo"
- " key in dict");
+ /* closing part */
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
+
+ ret = dict_set_dynstrn(nxattr, local->cont.getxattr.name, keylen,
+ xattr_serz);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set pathinfo key in dict");
+ if (ret == -EINVAL)
+ GF_FREE(xattr_serz);
+ }
- unwind:
- AFR_STACK_UNWIND (fgetxattr, frame, local->op_ret,
- local->op_errno, nxattr, local->xdata_rsp);
+ unwind:
+ AFR_STACK_UNWIND(fgetxattr, frame, local->op_ret, local->op_errno,
+ nxattr, local->xdata_rsp);
- if (nxattr)
- dict_unref (nxattr);
- }
+ if (nxattr)
+ dict_unref(nxattr);
+ }
out:
- return ret;
+ return ret;
}
int32_t
-afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+afr_getxattr_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- int32_t callcnt = 0;
- int ret = 0;
- char *xattr = NULL;
- char *xattr_serz = NULL;
- char xattr_cky[1024] = {0,};
- dict_t *nxattr = NULL;
- long cky = 0;
- int32_t padding = 0;
- int32_t tlen = 0;
-
- if (!frame || !frame->local || !this) {
- gf_log ("", GF_LOG_ERROR, "possible NULL deref");
- goto out;
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ char xattr_cky[1024] = {
+ 0,
+ };
+ int keylen = 0;
+ int xattr_cky_len = 0;
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_msg("", GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+ cky = (long)cookie;
+ keylen = strlen(local->cont.getxattr.name);
+ xattr_cky_len = snprintf(xattr_cky, sizeof(xattr_cky), "%s-%ld",
+ local->cont.getxattr.name, cky);
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret < 0) {
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
}
- local = frame->local;
- cky = (long) cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret < 0) {
- local->op_errno = op_errno;
- } else {
- local->op_ret = op_ret;
- if (!local->xdata_rsp && xdata)
- local->xdata_rsp = dict_ref (xdata);
- }
-
- if (!dict || (op_ret < 0))
- goto unlock;
-
- if (!local->dict)
- local->dict = dict_new ();
-
- if (local->dict) {
- ret = dict_get_str (dict,
- local->cont.getxattr.name,
- &xattr);
- if (ret)
- goto unlock;
-
- xattr = gf_strdup (xattr);
-
- (void)snprintf (xattr_cky, 1024, "%s-%ld",
- local->cont.getxattr.name, cky);
- ret = dict_set_dynstr (local->dict,
- xattr_cky, xattr);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Cannot set xattr cookie key");
- goto unlock;
- }
-
- local->cont.getxattr.xattr_len += strlen (xattr) + 1;
- }
- }
-unlock:
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (!local->cont.getxattr.xattr_len)
- goto unwind;
-
- nxattr = dict_new ();
- if (!nxattr)
- goto unwind;
-
- /* extra bytes for decorations (brackets and <>'s) */
- padding += strlen (this->name) + strlen (AFR_PATHINFO_HEADER) + 4;
- local->cont.getxattr.xattr_len += (padding + 2);
-
- xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
- sizeof (char), gf_common_mt_char);
-
- if (!xattr_serz)
- goto unwind;
-
- /* the xlator info */
- (void) sprintf (xattr_serz, "(<"AFR_PATHINFO_HEADER"%s> ",
- this->name);
-
- /* actual series of pathinfo */
- ret = dict_serialize_value_with_delim (local->dict,
- xattr_serz + strlen (xattr_serz),
- &tlen, ' ');
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Error serializing"
- " dictionary");
- goto unwind;
- }
-
- /* closing part */
- *(xattr_serz + padding + tlen) = ')';
- *(xattr_serz + padding + tlen + 1) = '\0';
+ if (!dict || (op_ret < 0))
+ goto unlock;
- ret = dict_set_dynstr (nxattr, local->cont.getxattr.name,
- xattr_serz);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo"
- " key in dict");
+ if (!local->dict) {
+ local->dict = dict_new();
+ if (!local->dict)
+ goto unlock;
+ }
+ ret = dict_get_strn(dict, local->cont.getxattr.name, keylen, &xattr);
+ if (ret)
+ goto unlock;
+
+ xattr = gf_strdup(xattr);
+
+ ret = dict_set_dynstrn(local->dict, xattr_cky, xattr_cky_len, xattr);
+ if (ret) {
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set xattr cookie key");
+ goto post_unlock;
+ }
- unwind:
- AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
- local->op_errno, nxattr, local->xdata_rsp);
+ local->cont.getxattr.xattr_len += strlen(xattr) + 1;
+ }
+unlock:
+ UNLOCK(&frame->lock);
+post_unlock:
+ if (!callcnt) {
+ if (!local->cont.getxattr.xattr_len)
+ goto unwind;
+
+ nxattr = dict_new();
+ if (!nxattr)
+ goto unwind;
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding += strlen(this->name) + SLEN(AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
+
+ xattr_serz = GF_MALLOC(local->cont.getxattr.xattr_len,
+ gf_common_mt_char);
+
+ if (!xattr_serz)
+ goto unwind;
+
+ /* the xlator info */
+ int xattr_serz_len = sprintf(
+ xattr_serz, "(<" AFR_PATHINFO_HEADER "%s> ", this->name);
+
+ /* actual series of pathinfo */
+ ret = dict_serialize_value_with_delim(
+ local->dict, xattr_serz + xattr_serz_len, &tlen, ' ');
+ if (ret) {
+ GF_FREE(xattr_serz);
+ goto unwind;
+ }
- if (nxattr)
- dict_unref (nxattr);
+ /* closing part */
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
+
+ ret = dict_set_dynstrn(nxattr, local->cont.getxattr.name, keylen,
+ xattr_serz);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set pathinfo key in dict");
+ if (ret == -EINVAL)
+ GF_FREE(xattr_serz);
}
+ unwind:
+ AFR_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno,
+ nxattr, local->xdata_rsp);
+
+ if (nxattr)
+ dict_unref(nxattr);
+ }
+
out:
- return ret;
+ return ret;
}
static int
-afr_aggregate_stime_xattr (dict_t *this, char *key, data_t *value, void *data)
+afr_aggregate_stime_xattr(dict_t *this, char *key, data_t *value, void *data)
{
- int ret = 0;
+ int ret = 0;
- if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0)
- ret = gf_get_max_stime (THIS, data, key, value);
+ if (fnmatch(GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0)
+ ret = gf_get_max_stime(THIS, data, key, value);
- return ret;
+ return ret;
}
int32_t
-afr_common_getxattr_stime_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+afr_common_getxattr_stime_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- int32_t callcnt = 0;
-
- if (!frame || !frame->local || !this) {
- gf_log ("", GF_LOG_ERROR, "possible NULL deref");
- goto out;
- }
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
- local = frame->local;
+ if (!frame || !frame->local || !this) {
+ gf_msg("", GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG, "possible NULL deref");
+ goto out;
+ }
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
+ local = frame->local;
- if (!dict || (op_ret < 0)) {
- local->op_errno = op_errno;
- goto cleanup;
- }
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
- if (!local->dict)
- local->dict = dict_copy_with_ref (dict, NULL);
- else
- dict_foreach (dict, afr_aggregate_stime_xattr,
- local->dict);
- local->op_ret = 0;
+ if (!dict || (op_ret < 0)) {
+ local->op_errno = op_errno;
+ goto cleanup;
}
+ if (!local->dict)
+ local->dict = dict_copy_with_ref(dict, NULL);
+ else
+ dict_foreach(dict, afr_aggregate_stime_xattr, local->dict);
+ local->op_ret = 0;
+ }
+
cleanup:
- UNLOCK (&frame->lock);
+ UNLOCK(&frame->lock);
- if (!callcnt) {
- AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
- local->op_errno, local->dict, xdata);
- }
+ if (!callcnt) {
+ AFR_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno,
+ local->dict, xdata);
+ }
out:
- return 0;
+ return 0;
}
-
static gf_boolean_t
-afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk,
- gf_boolean_t is_fgetxattr)
+afr_is_special_xattr(const char *name, fop_getxattr_cbk_t *cbk,
+ gf_boolean_t is_fgetxattr)
{
- gf_boolean_t is_spl = _gf_true;
-
- GF_ASSERT (cbk);
- if (!cbk || !name) {
- is_spl = _gf_false;
- goto out;
+ gf_boolean_t is_spl = _gf_true;
+
+ GF_ASSERT(cbk);
+ if (!cbk || !name) {
+ is_spl = _gf_false;
+ goto out;
+ }
+
+ if (!strcmp(name, GF_XATTR_PATHINFO_KEY) ||
+ !strcmp(name, GF_XATTR_USER_PATHINFO_KEY)) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_pathinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_pathinfo_cbk;
}
-
- if (!strcmp (name, GF_XATTR_PATHINFO_KEY) ||
- !strcmp (name, GF_XATTR_USER_PATHINFO_KEY)) {
- if (is_fgetxattr) {
- *cbk = afr_fgetxattr_pathinfo_cbk;
- } else {
- *cbk = afr_getxattr_pathinfo_cbk;
- }
- } else if (!strncmp (name, GF_XATTR_CLRLK_CMD,
- strlen (GF_XATTR_CLRLK_CMD))) {
- if (is_fgetxattr) {
- *cbk = afr_fgetxattr_clrlk_cbk;
- } else {
- *cbk = afr_getxattr_clrlk_cbk;
- }
- } else if (!strncmp (name, GF_XATTR_LOCKINFO_KEY,
- strlen (GF_XATTR_LOCKINFO_KEY))) {
- if (is_fgetxattr) {
- *cbk = afr_fgetxattr_lockinfo_cbk;
- } else {
- *cbk = afr_getxattr_lockinfo_cbk;
- }
- } else if (fnmatch (GF_XATTR_STIME_PATTERN, name, FNM_NOESCAPE) == 0) {
- *cbk = afr_common_getxattr_stime_cbk;
- } else if (strcmp (name, QUOTA_SIZE_KEY) == 0) {
- *cbk = afr_getxattr_quota_size_cbk;
+ } else if (!strncmp(name, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_clrlk_cbk;
} else {
- is_spl = _gf_false;
+ *cbk = afr_getxattr_clrlk_cbk;
}
+ } else if (!strncmp(name, GF_XATTR_LOCKINFO_KEY,
+ SLEN(GF_XATTR_LOCKINFO_KEY))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_lockinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_lockinfo_cbk;
+ }
+ } else if (fnmatch(GF_XATTR_STIME_PATTERN, name, FNM_NOESCAPE) == 0) {
+ *cbk = afr_common_getxattr_stime_cbk;
+ } else if (strcmp(name, QUOTA_SIZE_KEY) == 0) {
+ *cbk = afr_getxattr_quota_size_cbk;
+ } else if (!strcmp(name, GF_XATTR_LIST_NODE_UUIDS_KEY)) {
+ *cbk = afr_getxattr_list_node_uuids_cbk;
+ } else {
+ is_spl = _gf_false;
+ }
out:
- return is_spl;
+ return is_spl;
}
static void
-afr_getxattr_all_subvols (xlator_t *this, call_frame_t *frame,
- const char *name, loc_t *loc,
- fop_getxattr_cbk_t cbk)
+afr_getxattr_all_subvols(xlator_t *this, call_frame_t *frame, const char *name,
+ loc_t *loc, fop_getxattr_cbk_t cbk)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = 0;
- int call_count = 0;
-
- priv = this->private;
-
- local = frame->local;
- //local->call_count set in afr_local_init
- call_count = local->call_count;
-
- //If up-children count is 0, afr_local_init would have failed already
- //and the call would have unwound so not handling it here.
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, cbk,
- (void *) (long) i, priv->children[i],
- priv->children[i]->fops->getxattr,
- loc, name, NULL);
- if (!--call_count)
- break;
- }
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ priv = this->private;
+
+ local = frame->local;
+ // local->call_count set in afr_local_init
+ call_count = local->call_count;
+
+ if (!strcmp(name, GF_XATTR_LIST_NODE_UUIDS_KEY)) {
+ GF_FREE(local->cont.getxattr.name);
+ local->cont.getxattr.name = gf_strdup(GF_XATTR_NODE_UUID_KEY);
+ }
+
+ // If up-children count is 0, afr_local_init would have failed already
+ // and the call would have unwound so not handling it here.
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(frame, cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->getxattr, loc,
+ local->cont.getxattr.name, NULL);
+ if (!--call_count)
+ break;
}
- return;
+ }
+ return;
}
int
-afr_marker_populate_args (call_frame_t *frame, int type, int *gauge,
- xlator_t **subvols)
+afr_marker_populate_args(call_frame_t *frame, int type, int *gauge,
+ xlator_t **subvols)
{
- xlator_t *this = frame->this;
- afr_private_t *priv = this->private;
+ xlator_t *this = frame->this;
+ afr_private_t *priv = this->private;
- memcpy (subvols, priv->children, sizeof (*subvols) * priv->child_count);
+ memcpy(subvols, priv->children, sizeof(*subvols) * priv->child_count);
- if (type == MARKER_XTIME_TYPE) {
- /*Don't error out on ENOENT/ENOTCONN */
- gauge[MCNT_NOTFOUND] = 0;
- gauge[MCNT_ENOTCONN] = 0;
- }
- return priv->child_count;
+ if (type == MARKER_XTIME_TYPE) {
+ /*Don't error out on ENOENT/ENOTCONN */
+ gauge[MCNT_NOTFOUND] = 0;
+ gauge[MCNT_ENOTCONN] = 0;
+ }
+ return priv->child_count;
}
-int32_t
-afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata)
+static int
+afr_handle_heal_xattrs(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *heal_op)
{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- afr_local_t *local = NULL;
- int i = 0;
- int32_t op_errno = 0;
- int ret = -1;
- fop_getxattr_cbk_t cbk = NULL;
+ int ret = -1;
+ afr_spb_status_t *data = NULL;
+ if (!strcmp(heal_op, GF_HEAL_INFO)) {
+ afr_get_heal_info(frame, this, loc);
+ ret = 0;
+ goto out;
+ }
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
+ if (!strcmp(heal_op, GF_AFR_HEAL_SBRAIN)) {
+ afr_heal_splitbrain_file(frame, this, loc);
+ ret = 0;
+ goto out;
+ }
+
+ if (!strcmp(heal_op, GF_AFR_SBRAIN_STATUS)) {
+ data = GF_CALLOC(1, sizeof(*data), gf_afr_mt_spb_status_t);
+ if (!data) {
+ ret = 1;
+ goto out;
+ }
+ data->frame = frame;
+ data->loc = loc;
+ ret = synctask_new(this->ctx->env, afr_get_split_brain_status,
+ afr_get_split_brain_status_cbk, NULL, data);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_STATUS,
+ "Failed to create"
+ " synctask. Unable to fetch split-brain status"
+ " for %s.",
+ loc->name);
+ ret = 1;
+ goto out;
+ }
+ goto out;
+ }
- priv = this->private;
+out:
+ if (ret == 1) {
+ AFR_STACK_UNWIND(getxattr, frame, -1, ENOMEM, NULL, NULL);
+ if (data)
+ GF_FREE(data);
+ ret = 0;
+ }
+ return ret;
+}
- children = priv->children;
+int32_t
+afr_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int i = 0;
+ int32_t op_errno = 0;
+ int ret = -1;
+ fop_getxattr_cbk_t cbk = NULL;
- loc_copy (&local->loc, loc);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- local->op = GF_FOP_GETXATTR;
+ priv = this->private;
- if (xdata)
- local->xdata_req = dict_ref (xdata);
+ children = priv->children;
- if (!name)
- goto no_name;
+ loc_copy(&local->loc, loc);
- local->cont.getxattr.name = gf_strdup (name);
+ local->op = GF_FOP_GETXATTR;
- if (!local->cont.getxattr.name) {
- op_errno = ENOMEM;
- goto out;
- }
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
- if (!strncmp (name, AFR_XATTR_PREFIX,
- strlen (AFR_XATTR_PREFIX))) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: no data present for key %s",
- loc->path, name);
- op_errno = ENODATA;
- goto out;
- }
+ if (!name)
+ goto no_name;
- if (cluster_handle_marker_getxattr (frame, loc, name, priv->vol_uuid,
- afr_getxattr_unwind,
- afr_marker_populate_args) == 0)
- return 0;
+ local->cont.getxattr.name = gf_strdup(name);
- if (!strcmp (name, GF_HEAL_INFO)) {
- afr_get_heal_info (frame, this, loc, xdata);
- return 0;
- }
+ if (!local->cont.getxattr.name) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- if (!strcmp (name, GF_AFR_HEAL_SBRAIN)) {
- afr_heal_splitbrain_file (frame, this, loc);
- return 0;
- }
+ if (!strncmp(name, AFR_XATTR_PREFIX, SLEN(AFR_XATTR_PREFIX))) {
+ op_errno = ENODATA;
+ goto out;
+ }
- if (!strcmp (name, GF_AFR_SBRAIN_STATUS)) {
- afr_get_split_brain_status (frame, this, loc);
- return 0;
- }
- /*
- * Special xattrs which need responses from all subvols
- */
- if (afr_is_special_xattr (name, &cbk, 0)) {
- afr_getxattr_all_subvols (this, frame, name, loc, cbk);
- return 0;
- }
+ if (cluster_handle_marker_getxattr(frame, loc, name, priv->vol_uuid,
+ afr_getxattr_unwind,
+ afr_marker_populate_args) == 0)
+ return 0;
- if (XATTR_IS_NODE_UUID (name)) {
- i = 0;
- STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
- (void *) (long) i,
- children[i],
- children[i]->fops->getxattr,
- loc, name, xdata);
- return 0;
- }
+ ret = afr_handle_heal_xattrs(frame, this, &local->loc, name);
+ if (ret == 0)
+ return 0;
+
+ /*
+ * Heal daemons don't have IO threads ... and as a result they
+ * send this getxattr down and eventually crash :(
+ */
+ op_errno = -1;
+ GF_CHECK_XATTR_KEY_AND_GOTO(name, IO_THREADS_QUEUE_SIZE_KEY, op_errno, out);
+
+ /*
+ * Special xattrs which need responses from all subvols
+ */
+ if (afr_is_special_xattr(name, &cbk, 0)) {
+ afr_getxattr_all_subvols(this, frame, name, loc, cbk);
+ return 0;
+ }
+
+ if (XATTR_IS_NODE_UUID(name)) {
+ i = 0;
+ STACK_WIND_COOKIE(frame, afr_getxattr_node_uuid_cbk, (void *)(long)i,
+ children[i], children[i]->fops->getxattr, loc, name,
+ xdata);
+ return 0;
+ }
no_name:
- afr_read_txn (frame, this, local->loc.inode, afr_getxattr_wind,
- AFR_METADATA_TRANSACTION);
+ afr_read_txn(frame, this, local->loc.inode, afr_getxattr_wind,
+ AFR_METADATA_TRANSACTION);
- ret = 0;
+ ret = 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
- return 0;
+ if (ret < 0)
+ AFR_STACK_UNWIND(getxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
/* {{{ fgetxattr */
-
int32_t
-afr_fgetxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+afr_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (op_ret < 0) {
- local->op_ret = -1;
- local->op_errno = op_errno;
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- afr_read_txn_continue (frame, this, (long) cookie);
- return 0;
- }
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
- if (dict)
- afr_filter_xattrs (dict);
+ if (dict)
+ afr_filter_xattrs(dict);
- AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ return 0;
}
int
-afr_fgetxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_fgetxattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- priv = this->private;
-
- if (subvol == -1) {
- AFR_STACK_UNWIND (fgetxattr, frame, local->op_ret,
- local->op_errno, NULL, NULL);
- return 0;
- }
-
- STACK_WIND_COOKIE (frame, afr_fgetxattr_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->fgetxattr,
- local->fd, local->cont.getxattr.name,
- local->xdata_req);
- return 0;
-}
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ local = frame->local;
+ priv = this->private;
-static void
-afr_fgetxattr_all_subvols (xlator_t *this, call_frame_t *frame,
- fop_fgetxattr_cbk_t cbk)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = 0;
- int call_count = 0;
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(fgetxattr, frame, local->op_ret, local->op_errno, NULL,
+ NULL);
+ return 0;
+ }
- priv = this->private;
+ STACK_WIND_COOKIE(frame, afr_fgetxattr_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fgetxattr, local->fd,
+ local->cont.getxattr.name, local->xdata_req);
+ return 0;
+}
- local = frame->local;
- //local->call_count set in afr_local_init
- call_count = local->call_count;
-
- //If up-children count is 0, afr_local_init would have failed already
- //and the call would have unwound so not handling it here.
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fgetxattr,
- local->fd, local->cont.getxattr.name,
- NULL);
- if (!--call_count)
- break;
- }
+static void
+afr_fgetxattr_all_subvols(xlator_t *this, call_frame_t *frame,
+ fop_fgetxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ priv = this->private;
+
+ local = frame->local;
+ // local->call_count set in afr_local_init
+ call_count = local->call_count;
+
+ // If up-children count is 0, afr_local_init would have failed already
+ // and the call would have unwound so not handling it here.
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(frame, cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->fgetxattr, local->fd,
+ local->cont.getxattr.name, NULL);
+ if (!--call_count)
+ break;
}
+ }
- return;
+ return;
}
-
int
-afr_fgetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name, dict_t *xdata)
+afr_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- int32_t op_errno = 0;
- fop_fgetxattr_cbk_t cbk = NULL;
-
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
-
- local->op = GF_FOP_FGETXATTR;
- local->fd = fd_ref (fd);
- if (name) {
- local->cont.getxattr.name = gf_strdup (name);
- if (!local->cont.getxattr.name) {
- op_errno = ENOMEM;
- goto out;
- }
- }
- if (xdata)
- local->xdata_req = dict_ref (xdata);
-
- /* pathinfo gets handled only in getxattr(), but we need to handle
- * lockinfo.
- * If we are doing fgetxattr with lockinfo as the key then we
- * collect information from all children.
- */
- if (afr_is_special_xattr (name, &cbk, 1)) {
- afr_fgetxattr_all_subvols (this, frame, cbk);
- return 0;
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ fop_fgetxattr_cbk_t cbk = NULL;
+
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_FGETXATTR;
+ local->fd = fd_ref(fd);
+ if (name) {
+ local->cont.getxattr.name = gf_strdup(name);
+ if (!local->cont.getxattr.name) {
+ op_errno = ENOMEM;
+ goto out;
}
+ }
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ /* pathinfo gets handled only in getxattr(), but we need to handle
+ * lockinfo.
+ * If we are doing fgetxattr with lockinfo as the key then we
+ * collect information from all children.
+ */
+ if (afr_is_special_xattr(name, &cbk, 1)) {
+ afr_fgetxattr_all_subvols(this, frame, cbk);
+ return 0;
+ }
- afr_fix_open (fd, this);
+ afr_fix_open(fd, this);
- afr_read_txn (frame, this, fd->inode, afr_fgetxattr_wind,
- AFR_METADATA_TRANSACTION);
+ afr_read_txn(frame, this, fd->inode, afr_fgetxattr_wind,
+ AFR_METADATA_TRANSACTION);
- return 0;
+ return 0;
out:
- AFR_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND(fgetxattr, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-
/* }}} */
/* {{{ readv */
int
-afr_readv_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref, dict_t *xdata)
+afr_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *buf, struct iobref *iobref, dict_t *xdata)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (op_ret < 0) {
- local->op_ret = -1;
- local->op_errno = op_errno;
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- afr_read_txn_continue (frame, this, (long) cookie);
- return 0;
- }
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
- AFR_STACK_UNWIND (readv, frame, op_ret, op_errno,
- vector, count, buf, iobref, xdata);
- return 0;
+ AFR_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, buf, iobref,
+ xdata);
+ return 0;
}
+int
+afr_readv_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, 0, 0, 0,
+ 0, 0);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE(
+ frame, afr_readv_cbk, (void *)(long)subvol, priv->children[subvol],
+ priv->children[subvol]->fops->readv, local->fd, local->cont.readv.size,
+ local->cont.readv.offset, local->cont.readv.flags, local->xdata_req);
+ return 0;
+}
int
-afr_readv_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- priv = this->private;
-
- if (subvol == -1) {
- AFR_STACK_UNWIND (readv, frame, local->op_ret, local->op_errno,
- 0, 0, 0, 0, 0);
- return 0;
- }
-
- STACK_WIND_COOKIE (frame, afr_readv_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->readv,
- local->fd, local->cont.readv.size,
- local->cont.readv.offset, local->cont.readv.flags,
- local->xdata_req);
- return 0;
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_READ;
+ local->fd = fd_ref(fd);
+ local->cont.readv.size = size;
+ local->cont.readv.offset = offset;
+ local->cont.readv.flags = flags;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ afr_fix_open(fd, this);
+
+ afr_read_txn(frame, this, fd->inode, afr_readv_wind, AFR_DATA_TRANSACTION);
+
+ return 0;
+out:
+ AFR_STACK_UNWIND(readv, frame, -1, op_errno, 0, 0, 0, 0, 0);
+
+ return 0;
}
+/* }}} */
+
+/* {{{ seek */
int
-afr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
+afr_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, off_t offset, dict_t *xdata)
{
- afr_local_t * local = NULL;
- int32_t op_errno = 0;
+ afr_local_t *local = NULL;
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
+ local = frame->local;
- local->op = GF_FOP_READ;
- local->fd = fd_ref (fd);
- local->cont.readv.size = size;
- local->cont.readv.offset = offset;
- local->cont.readv.flags = flags;
- if (xdata)
- local->xdata_req = dict_ref (xdata);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- afr_fix_open (fd, this);
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
+
+ AFR_STACK_UNWIND(seek, frame, op_ret, op_errno, offset, xdata);
+ return 0;
+}
- afr_read_txn (frame, this, fd->inode, afr_readv_wind,
- AFR_DATA_TRANSACTION);
+int
+afr_seek_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ local = frame->local;
+ priv = this->private;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(seek, frame, local->op_ret, local->op_errno, 0, NULL);
return 0;
-out:
- AFR_STACK_UNWIND(readv, frame, -1, op_errno, 0, 0, 0, 0, 0);
+ }
- return 0;
+ STACK_WIND_COOKIE(
+ frame, afr_seek_cbk, (void *)(long)subvol, priv->children[subvol],
+ priv->children[subvol]->fops->seek, local->fd, local->cont.seek.offset,
+ local->cont.seek.what, local->xdata_req);
+ return 0;
}
+int
+afr_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_SEEK;
+ local->fd = fd_ref(fd);
+ local->cont.seek.offset = offset;
+ local->cont.seek.what = what;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ afr_fix_open(fd, this);
+
+ afr_read_txn(frame, this, fd->inode, afr_seek_wind, AFR_DATA_TRANSACTION);
+
+ return 0;
+out:
+ AFR_STACK_UNWIND(seek, frame, -1, op_errno, 0, NULL);
+
+ return 0;
+}
/* }}} */
diff --git a/xlators/cluster/afr/src/afr-inode-read.h b/xlators/cluster/afr/src/afr-inode-read.h
index d128134ef2a..8c982bc7e6f 100644
--- a/xlators/cluster/afr/src/afr-inode-read.h
+++ b/xlators/cluster/afr/src/afr-inode-read.h
@@ -12,34 +12,34 @@
#define __INODE_READ_H__
int32_t
-afr_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask, dict_t *xdata);
+afr_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata);
int32_t
-afr_stat (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xdata);
+afr_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
int32_t
-afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *xdata);
+afr_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata);
int32_t
-afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size, dict_t *xdata);
+afr_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata);
int32_t
-afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata);
+afr_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
int32_t
-afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata);
+afr_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata);
int32_t
-afr_fgetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name, dict_t *xdata);
-
+afr_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata);
int
-afr_handle_quota_size (call_frame_t *frame, xlator_t *this);
+afr_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata);
+int
+afr_handle_quota_size(call_frame_t *frame, xlator_t *this);
#endif /* __INODE_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index f712166e44d..1d6e4f3570a 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -8,694 +8,779 @@
cases as published by the Free Software Foundation.
*/
-
-#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
#include "protocol-common.h"
-
+#include <glusterfs/byte-order.h>
#include "afr-transaction.h"
-
+#include "afr-self-heal.h"
+#include "afr-messages.h"
static void
-__afr_inode_write_finalize (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int read_subvol = 0;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- if (local->inode) {
- if (local->transaction.type == AFR_METADATA_TRANSACTION)
- read_subvol = afr_metadata_subvol_get (local->inode, this,
- NULL, NULL);
- else
- read_subvol = afr_data_subvol_get (local->inode, this,
- NULL, NULL);
- }
-
- local->op_ret = -1;
- local->op_errno = afr_final_errno (local, priv);
-
- for (i = 0; i < priv->child_count; i++) {
- if (!local->replies[i].valid)
- continue;
- if (local->replies[i].op_ret < 0) {
- afr_inode_read_subvol_reset (local->inode, this);
- continue;
- }
-
- /* Order of checks in the compound conditional
- below is important.
-
- - Highest precedence: largest op_ret
- - Next precendence: if all op_rets are equal, read subvol
- - Least precedence: any succeeded subvol
- */
- if ((local->op_ret < local->replies[i].op_ret) ||
- ((local->op_ret == local->replies[i].op_ret) &&
- (i == read_subvol))) {
-
- local->op_ret = local->replies[i].op_ret;
- local->op_errno = local->replies[i].op_errno;
-
- local->cont.inode_wfop.prebuf =
- local->replies[i].prestat;
- local->cont.inode_wfop.postbuf =
- local->replies[i].poststat;
-
- if (local->replies[i].xdata) {
- if (local->xdata_rsp)
- dict_unref (local->xdata_rsp);
- local->xdata_rsp =
- dict_ref (local->replies[i].xdata);
- }
- }
- }
-}
+__afr_inode_write_finalize(call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ int ret = 0;
+ int read_subvol = 0;
+ struct iatt *stbuf = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_read_subvol_args_t args = {
+ 0,
+ };
+
+ local = frame->local;
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, local->inode, out);
+
+ /*This code needs to stay till DHT sends fops on linked
+ * inodes*/
+ if (!inode_is_linked(local->inode)) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret == -1)
+ continue;
+ if (!gf_uuid_is_null(local->replies[i].poststat.ia_gfid)) {
+ gf_uuid_copy(args.gfid, local->replies[i].poststat.ia_gfid);
+ args.ia_type = local->replies[i].poststat.ia_type;
+ break;
+ } else {
+ ret = dict_get_bin(local->replies[i].xdata,
+ DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
+ if (ret)
+ continue;
+ gf_uuid_copy(args.gfid, stbuf->ia_gfid);
+ args.ia_type = stbuf->ia_type;
+ break;
+ }
+ }
+ }
+
+ if (local->transaction.type == AFR_METADATA_TRANSACTION) {
+ read_subvol = afr_metadata_subvol_get(local->inode, this, NULL,
+ local->readable, NULL, &args);
+ } else {
+ read_subvol = afr_data_subvol_get(local->inode, this, NULL,
+ local->readable, NULL, &args);
+ }
+
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+ afr_pick_error_xdata(local, priv, local->inode, local->readable, NULL,
+ NULL);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret < 0)
+ continue;
+
+ /* Order of checks in the compound conditional
+ below is important.
+
+ - Highest precedence: largest op_ret
+ - Next precedence: if all op_rets are equal, read subvol
+ - Least precedence: any succeeded subvol
+ */
+ if ((local->op_ret < local->replies[i].op_ret) ||
+ ((local->op_ret == local->replies[i].op_ret) &&
+ (i == read_subvol))) {
+ local->op_ret = local->replies[i].op_ret;
+ local->op_errno = local->replies[i].op_errno;
+
+ local->cont.inode_wfop.prebuf = local->replies[i].prestat;
+ local->cont.inode_wfop.postbuf = local->replies[i].poststat;
+
+ if (local->replies[i].xdata) {
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ }
+ if (local->replies[i].xattr) {
+ if (local->xattr_rsp)
+ dict_unref(local->xattr_rsp);
+ local->xattr_rsp = dict_ref(local->replies[i].xattr);
+ }
+ }
+ }
+ afr_set_in_flight_sb_status(this, frame, local->inode);
+out:
+ return;
+}
static void
-__afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
- int op_ret, int op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+__afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xattr, dict_t *xdata)
{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->replies[child_index].valid = 1;
- local->replies[child_index].op_ret = op_ret;
- local->replies[child_index].op_errno = op_errno;
-
- if (op_ret >= 0) {
- if (prebuf)
- local->replies[child_index].prestat = *prebuf;
- if (postbuf)
- local->replies[child_index].poststat = *postbuf;
- if (xdata)
- local->replies[child_index].xdata = dict_ref (xdata);
- } else {
- afr_transaction_fop_failed (frame, this, child_index);
- }
-
- return;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ local->replies[child_index].valid = 1;
+
+ if (AFR_IS_ARBITER_BRICK(priv, child_index) && op_ret == 1)
+ op_ret = iov_length(local->cont.writev.vector,
+ local->cont.writev.count);
+
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref(xdata);
+
+ if (op_ret >= 0) {
+ if (prebuf)
+ local->replies[child_index].prestat = *prebuf;
+ if (postbuf)
+ local->replies[child_index].poststat = *postbuf;
+ if (xattr)
+ local->replies[child_index].xattr = dict_ref(xattr);
+ } else {
+ afr_transaction_fop_failed(frame, this, child_index);
+ }
+
+ return;
}
-
static int
-__afr_inode_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+__afr_inode_write_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xattr, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int child_index = (long) cookie;
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- __afr_inode_write_fill (frame, this, child_index, op_ret,
- op_errno, prebuf, postbuf, xdata);
+ afr_local_t *local = NULL;
+ int child_index = (long)cookie;
+ int call_count = -1;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ LOCK(&frame->lock);
+ {
+ __afr_inode_write_fill(frame, this, child_index, op_ret, op_errno,
+ prebuf, postbuf, xattr, xdata);
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
+
+ if (call_count == 0) {
+ __afr_inode_write_finalize(frame, this);
+
+ if (afr_txn_nothing_failed(frame, this)) {
+ /*if it did pre-op, it will do post-op changing ctime*/
+ if (priv->consistent_metadata && afr_needs_changelog_update(local))
+ afr_zero_fill_stat(local);
+ local->transaction.unwind(frame, this);
}
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- __afr_inode_write_finalize (frame, this);
- if (afr_txn_nothing_failed (frame, this))
- local->transaction.unwind (frame, this);
+ afr_transaction_resume(frame, this);
+ }
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
/* {{{ writev */
void
-afr_writev_copy_outvars (call_frame_t *src_frame, call_frame_t *dst_frame)
+afr_writev_copy_outvars(call_frame_t *src_frame, call_frame_t *dst_frame)
{
- afr_local_t *src_local = NULL;
- afr_local_t *dst_local = NULL;
-
- src_local = src_frame->local;
- dst_local = dst_frame->local;
-
- dst_local->op_ret = src_local->op_ret;
- dst_local->op_errno = src_local->op_errno;
- dst_local->cont.inode_wfop.prebuf = src_local->cont.inode_wfop.prebuf;
- dst_local->cont.inode_wfop.postbuf = src_local->cont.inode_wfop.postbuf;
- if (src_local->xdata_rsp)
- dst_local->xdata_rsp = dict_ref (src_local->xdata_rsp);
+ afr_local_t *src_local = NULL;
+ afr_local_t *dst_local = NULL;
+
+ src_local = src_frame->local;
+ dst_local = dst_frame->local;
+
+ dst_local->op_ret = src_local->op_ret;
+ dst_local->op_errno = src_local->op_errno;
+ dst_local->cont.inode_wfop.prebuf = src_local->cont.inode_wfop.prebuf;
+ dst_local->cont.inode_wfop.postbuf = src_local->cont.inode_wfop.postbuf;
+ if (src_local->xdata_rsp)
+ dst_local->xdata_rsp = dict_ref(src_local->xdata_rsp);
}
void
-afr_writev_unwind (call_frame_t *frame, xlator_t *this)
+afr_writev_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- local = frame->local;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
- AFR_STACK_UNWIND (writev, frame,
- local->op_ret, local->op_errno,
- &local->cont.inode_wfop.prebuf,
- &local->cont.inode_wfop.postbuf,
- local->xdata_rsp);
-}
+ local = frame->local;
+ if (priv->consistent_metadata)
+ afr_zero_fill_stat(local);
+
+ AFR_STACK_UNWIND(writev, frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+}
int
-afr_transaction_writev_unwind (call_frame_t *frame, xlator_t *this)
+afr_transaction_writev_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *fop_frame = NULL;
+ call_frame_t *fop_frame = NULL;
- fop_frame = afr_transaction_detach_fop_frame (frame);
+ fop_frame = afr_transaction_detach_fop_frame(frame);
- if (fop_frame) {
- afr_writev_copy_outvars (frame, fop_frame);
- afr_writev_unwind (fop_frame, this);
- }
- return 0;
+ if (fop_frame) {
+ afr_writev_copy_outvars(frame, fop_frame);
+ afr_writev_unwind(fop_frame, this);
+ }
+ return 0;
}
static void
-afr_writev_handle_short_writes (call_frame_t *frame, xlator_t *this)
+afr_writev_handle_short_writes(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
- /*
- * We already have the best case result of the writev calls staged
- * as the return value. Any writev that returns some value less
- * than the best case is now out of sync, so mark the fop as
- * failed. Note that fops that have returned with errors have
- * already been marked as failed.
- */
- for (i = 0; i < priv->child_count; i++) {
- if ((!local->replies[i].valid) ||
- (local->replies[i].op_ret == -1))
- continue;
-
- if (local->replies[i].op_ret < local->op_ret)
- afr_transaction_fop_failed (frame, this, i);
- }
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+ /*
+ * We already have the best case result of the writev calls staged
+ * as the return value. Any writev that returns some value less
+ * than the best case is now out of sync, so mark the fop as
+ * failed. Note that fops that have returned with errors have
+ * already been marked as failed.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if ((!local->replies[i].valid) || (local->replies[i].op_ret == -1))
+ continue;
+
+ if (local->replies[i].op_ret < local->op_ret)
+ afr_transaction_fop_failed(frame, this, i);
+ }
}
-int
-afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+void
+afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- call_frame_t *fop_frame = NULL;
- int child_index = (long) cookie;
- int call_count = -1;
- int ret = 0;
- uint32_t open_fd_count = 0;
- uint32_t write_is_append = 0;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- __afr_inode_write_fill (frame, this, child_index, op_ret,
- op_errno, prebuf, postbuf, xdata);
- if (op_ret == -1 || !xdata)
- goto unlock;
-
- write_is_append = 0;
- ret = dict_get_uint32 (xdata, GLUSTERFS_WRITE_IS_APPEND,
- &write_is_append);
- if (ret || !write_is_append)
- local->append_write = _gf_false;
-
- ret = dict_get_uint32 (xdata, GLUSTERFS_OPEN_FD_COUNT,
- &open_fd_count);
- if (ret == -1)
- goto unlock;
- if ((open_fd_count > local->open_fd_count)) {
- local->open_fd_count = open_fd_count;
- local->update_open_fd_count = _gf_true;
- }
+ int ret = 0;
+ afr_local_t *local = frame->local;
+ uint32_t open_fd_count = 0;
+ uint32_t write_is_append = 0;
+ int32_t num_inodelks = 0;
+
+ LOCK(&frame->lock);
+ {
+ __afr_inode_write_fill(frame, this, child_index, op_ret, op_errno,
+ prebuf, postbuf, NULL, xdata);
+ if (op_ret == -1 || !xdata)
+ goto unlock;
+
+ write_is_append = 0;
+ ret = dict_get_uint32(xdata, GLUSTERFS_WRITE_IS_APPEND,
+ &write_is_append);
+ if (ret || !write_is_append)
+ local->append_write = _gf_false;
+
+ ret = dict_get_uint32(xdata, GLUSTERFS_ACTIVE_FD_COUNT, &open_fd_count);
+ if (ret < 0)
+ goto unlock;
+ if (open_fd_count > local->open_fd_count) {
+ local->open_fd_count = open_fd_count;
+ local->update_open_fd_count = _gf_true;
}
-unlock:
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (!local->stable_write && !local->append_write)
- /* An appended write removes the necessity to
- fsync() the file. This is because self-heal
- has the logic to check for larger file when
- the xattrs are not reliably pointing at
- a stale file.
- */
- afr_fd_report_unstable_write (this, local->fd);
-
- __afr_inode_write_finalize (frame, this);
-
- afr_writev_handle_short_writes (frame, this);
-
- if (local->update_open_fd_count)
- afr_handle_open_fd_count (frame, this);
-
- if (!afr_txn_nothing_failed (frame, this)) {
- //Don't unwind until post-op is complete
- local->transaction.resume (frame, this);
- } else {
- /*
- * Generally inode-write fops do transaction.unwind then
- * transaction.resume, but writev needs to make sure that
- * delayed post-op frame is placed in fdctx before unwind
- * happens. This prevents the race of flush doing the
- * changelog wakeup first in fuse thread and then this
- * writev placing its delayed post-op frame in fdctx.
- * This helps flush make sure all the delayed post-ops are
- * completed.
- */
-
- fop_frame = afr_transaction_detach_fop_frame (frame);
- afr_writev_copy_outvars (frame, fop_frame);
- local->transaction.resume (frame, this);
- afr_writev_unwind (fop_frame, this);
- }
+
+ ret = dict_get_int32_sizen(xdata, GLUSTERFS_INODELK_COUNT,
+ &num_inodelks);
+ if (ret < 0)
+ goto unlock;
+ if (num_inodelks > local->num_inodelks) {
+ local->num_inodelks = num_inodelks;
+ local->update_num_inodelks = _gf_true;
}
- return 0;
+ }
+unlock:
+ UNLOCK(&frame->lock);
}
-
-int
-afr_writev_wind (call_frame_t *frame, xlator_t *this, int subvol)
+void
+afr_process_post_writev(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- priv = this->private;
-
- STACK_WIND_COOKIE (frame, afr_writev_wind_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->writev,
- local->fd, local->cont.writev.vector,
- local->cont.writev.count, local->cont.writev.offset,
- local->cont.writev.flags, local->cont.writev.iobref,
- local->xdata_req);
- return 0;
+ afr_local_t *local = NULL;
+ afr_lock_t *lock = NULL;
+
+ local = frame->local;
+
+ if (!local->stable_write && !local->append_write)
+ /* An appended write removes the necessity to
+ fsync() the file. This is because self-heal
+ has the logic to check for larger file when
+ the xattrs are not reliably pointing at
+ a stale file.
+ */
+ afr_fd_report_unstable_write(this, local);
+
+ __afr_inode_write_finalize(frame, this);
+
+ afr_writev_handle_short_writes(frame, this);
+
+ if (local->update_open_fd_count)
+ local->inode_ctx->open_fd_count = local->open_fd_count;
+ if (local->update_num_inodelks &&
+ local->transaction.type == AFR_DATA_TRANSACTION) {
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ lock->num_inodelks = local->num_inodelks;
+ }
}
-
int
-afr_do_writev (call_frame_t *frame, xlator_t *this)
+afr_writev_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- call_frame_t *transaction_frame = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int op_errno = ENOMEM;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame)
- goto out;
-
- local = frame->local;
- transaction_frame->local = local;
- frame->local = NULL;
-
- if (!AFR_FRAME_INIT (frame, op_errno))
- goto out;
+ call_frame_t *fop_frame = NULL;
+ int child_index = (long)cookie;
+ int call_count = -1;
- local->op = GF_FOP_WRITE;
+ afr_inode_write_fill(frame, this, child_index, op_ret, op_errno, prebuf,
+ postbuf, xdata);
- local->transaction.wind = afr_writev_wind;
- local->transaction.fop = __afr_txn_write_fop;
- local->transaction.done = __afr_txn_write_done;
- local->transaction.unwind = afr_transaction_writev_unwind;
+ call_count = afr_frame_return(frame);
- local->transaction.main_frame = frame;
+ if (call_count == 0) {
+ afr_process_post_writev(frame, this);
- if (local->fd->flags & O_APPEND) {
- /*
- * Backend vfs ignores the 'offset' for append mode fd so
- * locking just the region provided for the writev does not
- * give consistency guarantee. The actual write may happen at a
- * completely different range than the one provided by the
- * offset, len in the fop. So lock the entire file.
- */
- local->transaction.start = 0;
- local->transaction.len = 0;
+ if (!afr_txn_nothing_failed(frame, this)) {
+ // Don't unwind until post-op is complete
+ afr_transaction_resume(frame, this);
} else {
- local->transaction.start = local->cont.writev.offset;
- local->transaction.len = iov_length (local->cont.writev.vector,
- local->cont.writev.count);
+ /*
+ * Generally inode-write fops do transaction.unwind then
+ * transaction.resume, but writev needs to make sure that
+ * delayed post-op frame is placed in fdctx before unwind
+ * happens. This prevents the race of flush doing the
+ * changelog wakeup first in fuse thread and then this
+ * writev placing its delayed post-op frame in fdctx.
+ * This helps flush make sure all the delayed post-ops are
+ * completed.
+ */
+
+ fop_frame = afr_transaction_detach_fop_frame(frame);
+ afr_writev_copy_outvars(frame, fop_frame);
+ afr_transaction_resume(frame, this);
+ afr_writev_unwind(fop_frame, this);
}
-
- ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- return 0;
-out:
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
-
- AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ }
+ return 0;
}
+static int
+afr_arbiter_writev_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ static char byte = 0xFF;
+ static struct iovec vector = {&byte, 1};
+ int32_t count = 1;
+
+ STACK_WIND_COOKIE(
+ frame, afr_writev_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol], priv->children[subvol]->fops->writev, local->fd,
+ &vector, count, local->cont.writev.offset, local->cont.writev.flags,
+ local->cont.writev.iobref, local->xdata_req);
+
+ return 0;
+}
int
-afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- uint32_t flags, struct iobref *iobref, dict_t *xdata)
+afr_writev_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- int op_errno = ENOMEM;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
+ local = frame->local;
+ priv = this->private;
- local->cont.writev.vector = iov_dup (vector, count);
- if (!local->cont.writev.vector)
- goto out;
- local->cont.writev.count = count;
- local->cont.writev.offset = offset;
- local->cont.writev.flags = flags;
- local->cont.writev.iobref = iobref_ref (iobref);
+ if (AFR_IS_ARBITER_BRICK(priv, subvol)) {
+ afr_arbiter_writev_wind(frame, this, subvol);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE(frame, afr_writev_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->writev, local->fd,
+ local->cont.writev.vector, local->cont.writev.count,
+ local->cont.writev.offset, local->cont.writev.flags,
+ local->cont.writev.iobref, local->xdata_req);
+ return 0;
+}
- if (xdata)
- local->xdata_req = dict_copy_with_ref (xdata, NULL);
- else
- local->xdata_req = dict_new ();
+int
+afr_do_writev(call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- if (!local->xdata_req)
- goto out;
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- local->fd = fd_ref (fd);
- local->inode = inode_ref (fd->inode);
+ local = frame->local;
+ transaction_frame->local = local;
+ frame->local = NULL;
- if (dict_set_uint32 (local->xdata_req, GLUSTERFS_OPEN_FD_COUNT, 4)) {
- op_errno = ENOMEM;
- goto out;
- }
+ if (!AFR_FRAME_INIT(frame, op_errno))
+ goto out;
- if (dict_set_uint32 (local->xdata_req, GLUSTERFS_WRITE_IS_APPEND, 4)) {
- op_errno = ENOMEM;
- goto out;
- }
+ local->op = GF_FOP_WRITE;
- /* Set append_write to be true speculatively. If on any
- server it turns not be true, we unset it in the
- callback.
- */
- local->append_write = _gf_true;
+ local->transaction.wind = afr_writev_wind;
+ local->transaction.unwind = afr_transaction_writev_unwind;
- /* detect here, but set it in writev_wind_cbk *after* the unstable
- write is performed
- */
- local->stable_write = !!((fd->flags|flags)&(O_SYNC|O_DSYNC));
+ local->transaction.main_frame = frame;
- afr_fix_open (fd, this);
+ if (local->fd->flags & O_APPEND) {
+ /*
+ * Backend vfs ignores the 'offset' for append mode fd so
+ * locking just the region provided for the writev does not
+ * give consistency guarantee. The actual write may happen at a
+ * completely different range than the one provided by the
+ * offset, len in the fop. So lock the entire file.
+ */
+ local->transaction.start = 0;
+ local->transaction.len = 0;
+ } else {
+ local->transaction.start = local->cont.writev.offset;
+ local->transaction.len = iov_length(local->cont.writev.vector,
+ local->cont.writev.count);
+ }
+
+ ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- afr_do_writev (frame, this);
+ AFR_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
- return 0;
+int
+afr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int op_errno = ENOMEM;
+ int ret = -1;
+
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->cont.writev.vector = iov_dup(vector, count);
+ if (!local->cont.writev.vector)
+ goto out;
+ local->cont.writev.count = count;
+ local->cont.writev.offset = offset;
+ local->cont.writev.flags = flags;
+ local->cont.writev.iobref = iobref_ref(iobref);
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
+
+ if (dict_set_uint32(local->xdata_req, GLUSTERFS_ACTIVE_FD_COUNT, 4)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (dict_set_str_sizen(local->xdata_req, GLUSTERFS_INODELK_DOM_COUNT,
+ this->name)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (dict_set_uint32(local->xdata_req, GLUSTERFS_WRITE_IS_APPEND, 4)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ /* Set append_write to be true speculatively. If on any
+ server it turns not be true, we unset it in the
+ callback.
+ */
+ local->append_write = _gf_true;
+
+ /* detect here, but set it in writev_wind_cbk *after* the unstable
+ write is performed
+ */
+ local->stable_write = !!((fd->flags | flags) & (O_SYNC | O_DSYNC));
+
+ afr_fix_open(fd, this);
+
+ afr_do_writev(frame, this);
+
+ return 0;
out:
- AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
+ AFR_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
/* }}} */
/* {{{ truncate */
int
-afr_truncate_unwind (call_frame_t *frame, xlator_t *this)
+afr_truncate_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
+ local = frame->local;
- main_frame = afr_transaction_detach_fop_frame (frame);
- if (!main_frame)
- return 0;
-
- AFR_STACK_UNWIND (truncate, main_frame, local->op_ret, local->op_errno,
- &local->cont.inode_wfop.prebuf,
- &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(truncate, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
int
-afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+afr_truncate_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size)
- local->stable_write = _gf_false;
+ if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
- return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
- prebuf, postbuf, xdata);
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, NULL, xdata);
}
-
int
-afr_truncate_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_truncate_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->truncate,
- &local->loc, local->cont.truncate.offset,
- local->xdata_req);
- return 0;
+ STACK_WIND_COOKIE(frame, afr_truncate_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->truncate, &local->loc,
+ local->cont.truncate.offset, local->xdata_req);
+ return 0;
}
-
int
-afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset, dict_t *xdata)
+afr_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = ENOMEM;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame)
- goto out;
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- local = AFR_FRAME_INIT (transaction_frame, op_errno);
- if (!local)
- goto out;
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- local->cont.truncate.offset = offset;
- if (xdata)
- local->xdata_req = dict_copy_with_ref (xdata, NULL);
- else
- local->xdata_req = dict_new ();
+ local->cont.truncate.offset = offset;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- if (!local->xdata_req)
- goto out;
+ if (!local->xdata_req)
+ goto out;
- local->transaction.wind = afr_truncate_wind;
- local->transaction.fop = __afr_txn_write_fop;
- local->transaction.done = __afr_txn_write_done;
- local->transaction.unwind = afr_truncate_unwind;
+ local->transaction.wind = afr_truncate_wind;
+ local->transaction.unwind = afr_truncate_unwind;
- loc_copy (&local->loc, loc);
- local->inode = inode_ref (loc->inode);
+ loc_copy(&local->loc, loc);
+ ret = afr_set_inode_local(this, local, loc->inode);
+ if (ret)
+ goto out;
- local->op = GF_FOP_TRUNCATE;
+ local->op = GF_FOP_TRUNCATE;
- local->transaction.main_frame = frame;
- local->transaction.start = offset;
- local->transaction.len = 0;
+ local->transaction.main_frame = frame;
+ local->transaction.start = offset;
+ local->transaction.len = 0;
- /* Set it true speculatively, will get reset in afr_truncate_wind_cbk
- if truncate was not a NOP */
- local->stable_write = _gf_true;
+ /* Set it true speculatively, will get reset in afr_truncate_wind_cbk
+ if truncate was not a NOP */
+ local->stable_write = _gf_true;
- ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- return 0;
+ return 0;
out:
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- AFR_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ AFR_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-
/* }}} */
/* {{{ ftruncate */
-
int
-afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
+afr_ftruncate_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- main_frame = afr_transaction_detach_fop_frame (frame);
- if (!main_frame)
- return 0;
+ local = frame->local;
- AFR_STACK_UNWIND (ftruncate, main_frame, local->op_ret, local->op_errno,
- &local->cont.inode_wfop.prebuf,
- &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(ftruncate, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
int
-afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+afr_ftruncate_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size)
- local->stable_write = _gf_false;
+ if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
- return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
- prebuf, postbuf, xdata);
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, NULL, xdata);
}
-
int
-afr_ftruncate_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_ftruncate_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->ftruncate,
- local->fd, local->cont.ftruncate.offset,
- local->xdata_req);
- return 0;
+ STACK_WIND_COOKIE(frame, afr_ftruncate_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->ftruncate, local->fd,
+ local->cont.ftruncate.offset, local->xdata_req);
+ return 0;
}
-
int
-afr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
+afr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = ENOMEM;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame)
- goto out;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- local = AFR_FRAME_INIT (transaction_frame, op_errno);
- if (!local)
- goto out;
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- local->cont.ftruncate.offset = offset;
- if (xdata)
- local->xdata_req = dict_copy_with_ref (xdata, NULL);
- else
- local->xdata_req = dict_new ();
+ local->cont.ftruncate.offset = offset;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- if (!local->xdata_req)
- goto out;
+ if (!local->xdata_req)
+ goto out;
- local->fd = fd_ref (fd);
- local->inode = inode_ref (fd->inode);
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
- local->op = GF_FOP_FTRUNCATE;
+ local->op = GF_FOP_FTRUNCATE;
- local->transaction.wind = afr_ftruncate_wind;
- local->transaction.fop = __afr_txn_write_fop;
- local->transaction.done = __afr_txn_write_done;
- local->transaction.unwind = afr_ftruncate_unwind;
+ local->transaction.wind = afr_ftruncate_wind;
+ local->transaction.unwind = afr_ftruncate_unwind;
- local->transaction.main_frame = frame;
+ local->transaction.main_frame = frame;
- local->transaction.start = local->cont.ftruncate.offset;
- local->transaction.len = 0;
+ local->transaction.start = local->cont.ftruncate.offset;
+ local->transaction.len = 0;
- afr_fix_open (fd, this);
+ afr_fix_open(fd, this);
- /* Set it true speculatively, will get reset in afr_ftruncate_wind_cbk
- if truncate was not a NOP */
- local->stable_write = _gf_true;
+ /* Set it true speculatively, will get reset in afr_ftruncate_wind_cbk
+ if truncate was not a NOP */
+ local->stable_write = _gf_true;
- ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- return 0;
+ return 0;
out:
- AFR_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+ AFR_STACK_UNWIND(ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
/* }}} */
@@ -703,1133 +788,1778 @@ out:
/* {{{ setattr */
int
-afr_setattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_setattr_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- main_frame = afr_transaction_detach_fop_frame (frame);
- if (!main_frame)
- return 0;
+ local = frame->local;
- AFR_STACK_UNWIND (setattr, main_frame, local->op_ret, local->op_errno,
- &local->cont.inode_wfop.prebuf,
- &local->cont.inode_wfop.postbuf,
- local->xdata_rsp);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
+
+ AFR_STACK_UNWIND(setattr, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
}
+int
+afr_setattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preop,
+ struct iatt *postop, dict_t *xdata)
+{
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, preop,
+ postop, NULL, xdata);
+}
+
+int
+afr_setattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_setattr_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->setattr, &local->loc,
+ &local->cont.setattr.in_buf, local->cont.setattr.valid,
+ local->xdata_req);
+ return 0;
+}
int
-afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- struct iatt *preop, struct iatt *postop, dict_t *xdata)
+afr_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf,
+ int32_t valid, dict_t *xdata)
{
- return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
- preop, postop, xdata);
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->cont.setattr.in_buf = *buf;
+ local->cont.setattr.valid = valid;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_setattr_wind;
+ local->transaction.unwind = afr_setattr_unwind;
+
+ loc_copy(&local->loc, loc);
+ ret = afr_set_inode_local(this, local, loc->inode);
+ if (ret)
+ goto out;
+
+ local->op = GF_FOP_SETATTR;
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
+
+ AFR_STACK_UNWIND(setattr, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
+/* {{{ fsetattr */
int
-afr_setattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_fsetattr_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- STACK_WIND_COOKIE (frame, afr_setattr_wind_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->setattr,
- &local->loc, &local->cont.setattr.in_buf,
- local->cont.setattr.valid, local->xdata_req);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
+
+ AFR_STACK_UNWIND(fsetattr, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
}
+int
+afr_fsetattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preop,
+ struct iatt *postop, dict_t *xdata)
+{
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, preop,
+ postop, NULL, xdata);
+}
int
-afr_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf,
- int32_t valid, dict_t *xdata)
+afr_fsetattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = ENOMEM;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_fsetattr_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fsetattr, local->fd,
+ &local->cont.fsetattr.in_buf, local->cont.fsetattr.valid,
+ local->xdata_req);
+ return 0;
+}
- transaction_frame = copy_frame (frame);
- if (!transaction_frame)
- goto out;
+int
+afr_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *buf,
+ int32_t valid, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- local = AFR_FRAME_INIT (transaction_frame, op_errno);
- if (!local)
- goto out;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- local->cont.setattr.in_buf = *buf;
- local->cont.setattr.valid = valid;
- if (xdata)
- local->xdata_req = dict_copy_with_ref (xdata, NULL);
- else
- local->xdata_req = dict_new ();
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- if (!local->xdata_req)
- goto out;
+ local->cont.fsetattr.in_buf = *buf;
+ local->cont.fsetattr.valid = valid;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- local->transaction.wind = afr_setattr_wind;
- local->transaction.fop = __afr_txn_write_fop;
- local->transaction.done = __afr_txn_write_done;
- local->transaction.unwind = afr_setattr_unwind;
+ if (!local->xdata_req)
+ goto out;
- loc_copy (&local->loc, loc);
- local->inode = inode_ref (loc->inode);
+ local->transaction.wind = afr_fsetattr_wind;
+ local->transaction.unwind = afr_fsetattr_unwind;
- local->op = GF_FOP_SETATTR;
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ local->op = GF_FOP_FSETATTR;
- ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ afr_fix_open(fd, this);
- return 0;
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- AFR_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ AFR_STACK_UNWIND(fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-/* {{{ fsetattr */
+/* {{{ setxattr */
int
-afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_setxattr_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- main_frame = afr_transaction_detach_fop_frame (frame);
- if (!main_frame)
- return 0;
+ local = frame->local;
- AFR_STACK_UNWIND (fsetattr, main_frame, local->op_ret, local->op_errno,
- &local->cont.inode_wfop.prebuf,
- &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(setxattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
+}
int
-afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop, dict_t *xdata)
+afr_setxattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
- preop, postop, xdata);
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ NULL, NULL, xdata);
}
-
int
-afr_fsetattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_setxattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- priv = this->private;
-
- STACK_WIND_COOKIE (frame, afr_fsetattr_wind_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->fsetattr,
- local->fd, &local->cont.fsetattr.in_buf,
- local->cont.fsetattr.valid, local->xdata_req);
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_setxattr_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->setxattr, &local->loc,
+ local->cont.setxattr.dict, local->cont.setxattr.flags,
+ local->xdata_req);
+ return 0;
}
-
int
-afr_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata)
+afr_emptyb_set_pending_changelog_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
+
{
- afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = ENOMEM;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i, ret = 0;
+ char *op_type = NULL;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame)
- goto out;
+ local = frame->local;
+ priv = this->private;
+ i = (long)cookie;
- local = AFR_FRAME_INIT (transaction_frame, op_errno);
- if (!local)
- goto out;
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
- local->cont.fsetattr.in_buf = *buf;
- local->cont.fsetattr.valid = valid;
- if (xdata)
- local->xdata_req = dict_copy_with_ref (xdata, NULL);
- else
- local->xdata_req = dict_new ();
+ ret = dict_get_str_sizen(local->xdata_req, "replicate-brick-op", &op_type);
+ if (ret)
+ goto out;
- if (!local->xdata_req)
- goto out;
+ gf_smsg(this->name, op_ret ? GF_LOG_ERROR : GF_LOG_INFO,
+ op_ret ? op_errno : 0, AFR_MSG_SET_PEND_XATTR, "name=%s",
+ priv->children[i]->name, "op_ret=%s",
+ op_ret ? "failed" : "succeeded", NULL);
- local->transaction.wind = afr_fsetattr_wind;
- local->transaction.fop = __afr_txn_write_fop;
- local->transaction.done = __afr_txn_write_done;
- local->transaction.unwind = afr_fsetattr_unwind;
+out:
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
- local->fd = fd_ref (fd);
- local->inode = inode_ref (fd->inode);
+int
+afr_emptyb_set_pending_changelog(call_frame_t *frame, xlator_t *this,
+ unsigned char *locked_nodes)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int ret = 0, i = 0;
- local->op = GF_FOP_FSETATTR;
+ local = frame->local;
+ priv = this->private;
- afr_fix_open (fd, this);
+ AFR_ONLIST(locked_nodes, frame, afr_emptyb_set_pending_changelog_cbk,
+ xattrop, &local->loc, GF_XATTROP_ADD_ARRAY, local->xattr_req,
+ NULL);
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ /* It is sufficient if xattrop was successful on one child */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
- ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
+ if (local->replies[i].op_ret == 0) {
+ ret = 0;
+ goto out;
+ } else {
+ ret = afr_higher_errno(ret, local->replies[i].op_errno);
}
-
- return 0;
+ }
out:
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
-
- AFR_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return -ret;
}
+static int
+_afr_handle_empty_brick_type(xlator_t *this, call_frame_t *frame, loc_t *loc,
+ int empty_index, afr_transaction_type type,
+ char *op_type, const int op_type_len)
+{
+ int count = 0;
+ int ret = -ENOMEM;
+ int idx = -1;
+ int d_idx = -1;
+ unsigned char *locked_nodes = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ locked_nodes = alloca0(priv->child_count);
+
+ idx = afr_index_for_transaction_type(type);
+ d_idx = afr_index_for_transaction_type(AFR_DATA_TRANSACTION);
+
+ local->pending = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!local->pending)
+ goto out;
+
+ local->pending[empty_index][idx] = hton32(1);
+
+ if ((priv->esh_granular) && (type == AFR_ENTRY_TRANSACTION))
+ local->pending[empty_index][d_idx] = hton32(1);
+
+ local->xdata_req = dict_new();
+ if (!local->xdata_req)
+ goto out;
+
+ ret = dict_set_nstrn(local->xdata_req, "replicate-brick-op",
+ SLEN("replicate-brick-op"), op_type, op_type_len);
+ if (ret)
+ goto out;
+
+ local->xattr_req = dict_new();
+ if (!local->xattr_req)
+ goto out;
+
+ ret = afr_set_pending_dict(priv, local->xattr_req, local->pending);
+ if (ret < 0)
+ goto out;
+
+ if (AFR_ENTRY_TRANSACTION == type) {
+ count = afr_selfheal_entrylk(frame, this, loc->inode, this->name, NULL,
+ locked_nodes);
+ } else {
+ count = afr_selfheal_inodelk(frame, this, loc->inode, this->name,
+ LLONG_MAX - 1, 0, locked_nodes);
+ }
+
+ if (!count) {
+ gf_smsg(this->name, GF_LOG_ERROR, EAGAIN, AFR_MSG_REPLACE_BRICK_STATUS,
+ NULL);
+ ret = -EAGAIN;
+ goto unlock;
+ }
+
+ ret = afr_emptyb_set_pending_changelog(frame, this, locked_nodes);
+ if (ret)
+ goto unlock;
+ ret = 0;
+unlock:
+ if (AFR_ENTRY_TRANSACTION == type) {
+ afr_selfheal_unentrylk(frame, this, loc->inode, this->name, NULL,
+ locked_nodes, NULL);
+ } else {
+ afr_selfheal_uninodelk(frame, this, loc->inode, this->name,
+ LLONG_MAX - 1, 0, locked_nodes);
+ }
+out:
+ return ret;
+}
-/* {{{ setxattr */
+void
+afr_brick_args_cleanup(void *opaque)
+{
+ afr_empty_brick_args_t *data = NULL;
+ data = opaque;
+ loc_wipe(&data->loc);
+ GF_FREE(data);
+}
int
-afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
+_afr_handle_empty_brick_cbk(int ret, call_frame_t *frame, void *opaque)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
-
- main_frame = afr_transaction_detach_fop_frame (frame);
- if (!main_frame)
- return 0;
-
- AFR_STACK_UNWIND (setxattr, main_frame, local->op_ret, local->op_errno,
- local->xdata_rsp);
- return 0;
+ afr_brick_args_cleanup(opaque);
+ return 0;
}
-
int
-afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+_afr_handle_empty_brick(void *opaque)
{
- return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
- NULL, NULL, xdata);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int empty_index = -1;
+ int ret = -1;
+ int op_errno = ENOMEM;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ char *op_type = NULL;
+ int op_type_len = 0;
+ afr_empty_brick_args_t *data = NULL;
+ call_frame_t *op_frame = NULL;
+
+ data = opaque;
+ frame = data->frame;
+ empty_index = data->empty_index;
+ if (!data->op_type)
+ goto out;
+
+ op_frame = copy_frame(frame);
+ if (!op_frame) {
+ ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_type = data->op_type;
+ op_type_len = strlen(op_type);
+ this = op_frame->this;
+ priv = this->private;
+
+ afr_set_lk_owner(op_frame, this, op_frame->root);
+ local = AFR_FRAME_INIT(op_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, &data->loc);
+
+ gf_smsg(this->name, GF_LOG_INFO, 0, AFR_MSG_NEW_BRICK, "name=%s",
+ priv->children[empty_index]->name, NULL);
+
+ ret = _afr_handle_empty_brick_type(this, op_frame, &local->loc, empty_index,
+ AFR_METADATA_TRANSACTION, op_type,
+ op_type_len);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ dict_unref(local->xdata_req);
+ dict_unref(local->xattr_req);
+ afr_matrix_cleanup(local->pending, priv->child_count);
+ local->pending = NULL;
+ local->xattr_req = NULL;
+ local->xdata_req = NULL;
+
+ ret = _afr_handle_empty_brick_type(this, op_frame, &local->loc, empty_index,
+ AFR_ENTRY_TRANSACTION, op_type,
+ op_type_len);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ if (op_frame) {
+ AFR_STACK_DESTROY(op_frame);
+ }
+ AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
+ return 0;
}
-
int
-afr_setxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_split_brain_resolve_do(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ char *data)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- priv = this->private;
-
- STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->setxattr,
- &local->loc, local->cont.setxattr.dict,
- local->cont.setxattr.flags, local->xdata_req);
- return 0;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_int32_sizen(local->xdata_req, "heal-op",
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str_sizen(local->xdata_req, "child-name", data);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ /* set spb choice to -1 whether heal succeeds or not:
+ * If heal succeeds : spb-choice should be set to -1 as
+ * it is no longer valid; file is not
+ * in split-brain anymore.
+ * If heal doesn't succeed:
+ * spb-choice should be set to -1
+ * otherwise reads will be served
+ * from spb-choice which is misleading.
+ */
+ ret = afr_inode_split_brain_choice_set(loc->inode, this, -1);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN_SET_FAILED,
+ NULL);
+ afr_heal_splitbrain_file(frame, this, loc);
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
+ return 0;
}
int
-afr_split_brain_resolve_do (call_frame_t *frame, xlator_t *this, loc_t *loc,
- char *data)
+afr_get_split_brain_child_index(xlator_t *this, void *value, size_t len)
{
- afr_local_t *local = NULL;
- int ret = -1;
- int op_errno = EINVAL;
+ int spb_child_index = -1;
+ char *spb_child_str = NULL;
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
+ spb_child_str = alloca0(len + 1);
+ memcpy(spb_child_str, value, len);
+
+ if (!strcmp(spb_child_str, "none"))
+ return -2;
- local->op = GF_FOP_SETXATTR;
+ spb_child_index = afr_get_child_index_from_name(this, spb_child_str);
+ if (spb_child_index < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "subvol=%s", spb_child_str, NULL);
+ }
+ return spb_child_index;
+}
- local->xdata_req = dict_new ();
+int
+afr_can_set_split_brain_choice(void *opaque)
+{
+ afr_spbc_timeout_t *data = opaque;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ loc_t *loc = NULL;
+ int ret = -1;
+
+ frame = data->frame;
+ loc = data->loc;
+ this = frame->this;
+
+ ret = afr_is_split_brain(frame, this, loc->inode, loc->gfid, &data->d_spb,
+ &data->m_spb);
+
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SPLIT_BRAIN_DETERMINE_FAILED, "gfid=%s",
+ uuid_utoa(loc->gfid), NULL);
+ return ret;
+}
- if (!local->xdata_req) {
- op_errno = ENOMEM;
+int
+afr_handle_split_brain_commands(xlator_t *this, call_frame_t *frame, loc_t *loc,
+ dict_t *dict)
+{
+ void *choice_value = NULL;
+ void *resolve_value = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_spbc_timeout_t *data = NULL;
+ int len = 0;
+ int spb_child_index = -1;
+ int ret = -1;
+ int op_errno = EINVAL;
+
+ priv = this->private;
+
+ ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_CHOICE, &choice_value, &len);
+ ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_RESOLVE, &resolve_value,
+ &len);
+ if (!choice_value && !resolve_value) {
+ ret = -1;
+ goto out;
+ }
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local) {
+ ret = 1;
+ goto out;
+ }
+
+ local->op = GF_FOP_SETXATTR;
+
+ if (choice_value) {
+ spb_child_index = afr_get_split_brain_child_index(this, choice_value,
+ len);
+ if (spb_child_index < 0) {
+ /* Case where value was "none" */
+ if (spb_child_index == -2)
+ spb_child_index = -1;
+ else {
+ ret = 1;
+ op_errno = EINVAL;
goto out;
+ }
}
- ret = dict_set_int32 (local->xdata_req, "heal-op",
- GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
- if (ret) {
- op_errno = -ret;
- ret = -1;
- goto out;
+ data = GF_CALLOC(1, sizeof(*data), gf_afr_mt_spbc_timeout_t);
+ if (!data) {
+ ret = 1;
+ goto out;
}
- ret = dict_set_str (local->xdata_req, "child-name", data);
+ data->spb_child_index = spb_child_index;
+ data->frame = frame;
+ loc_copy(&local->loc, loc);
+ data->loc = &local->loc;
+ ret = synctask_new(this->ctx->env, afr_can_set_split_brain_choice,
+ afr_set_split_brain_choice, NULL, data);
if (ret) {
- op_errno = -ret;
- ret = -1;
- goto out;
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_STATUS,
+ "name=%s", loc->name, NULL);
+ ret = 1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ ret = 0;
+ goto out;
+ }
+
+ if (resolve_value) {
+ spb_child_index = afr_get_split_brain_child_index(this, resolve_value,
+ len);
+ if (spb_child_index < 0) {
+ ret = 1;
+ goto out;
}
- afr_heal_splitbrain_file (frame, this, loc);
+
+ afr_split_brain_resolve_do(frame, this, loc,
+ priv->children[spb_child_index]->name);
+ ret = 0;
+ }
out:
- if (ret < 0)
- AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
- return 0;
+ /* key was correct but value was invalid when ret == 1 */
+ if (ret == 1) {
+ AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
+ if (data)
+ GF_FREE(data);
+ ret = 0;
+ }
+ return ret;
}
int
-afr_set_split_brain_choice (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int spb_choice)
+afr_handle_spb_choice_timeout(xlator_t *this, call_frame_t *frame, dict_t *dict)
{
- int ret = -1;
- int op_errno = ENOMEM;
- afr_private_t *priv = NULL;
+ int ret = -1;
+ int op_errno = 0;
+ uint64_t timeout = 0;
+ afr_private_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- ret = afr_inode_split_brain_choice_set (loc->inode, this, spb_choice);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to set"
- "split-brain choice as %s for %s",
- priv->children[spb_choice]->name,
- loc->name);
- }
- inode_invalidate (loc->inode);
- AFR_STACK_UNWIND (setxattr, frame, ret, op_errno, NULL);
- return ret;
+ ret = dict_get_uint64(dict, GF_AFR_SPB_CHOICE_TIMEOUT, &timeout);
+ if (!ret) {
+ priv->spb_choice_timeout = timeout * 60;
+ AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
+ }
+
+ return ret;
}
int
-afr_get_split_brain_child_index (xlator_t *this, void *value, size_t len)
+afr_handle_empty_brick(xlator_t *this, call_frame_t *frame, loc_t *loc,
+ dict_t *dict)
{
- int spb_child_index = -1;
- char *spb_child_str = NULL;
+ int ret = -1;
+ int ab_ret = -1;
+ int empty_index = -1;
+ int op_errno = EPERM;
+ char *empty_brick = NULL;
+ char *op_type = NULL;
+ afr_empty_brick_args_t *data = NULL;
+
+ ret = dict_get_str_sizen(dict, GF_AFR_REPLACE_BRICK, &empty_brick);
+ if (!ret)
+ op_type = GF_AFR_REPLACE_BRICK;
+
+ ab_ret = dict_get_str_sizen(dict, GF_AFR_ADD_BRICK, &empty_brick);
+ if (!ab_ret)
+ op_type = GF_AFR_ADD_BRICK;
+
+ if (ret && ab_ret)
+ goto out;
+
+ if (frame->root->pid != GF_CLIENT_PID_ADD_REPLICA_MOUNT) {
+ gf_smsg(this->name, GF_LOG_ERROR, EPERM, AFR_MSG_INTERNAL_ATTR,
+ "op_type=%s", op_type, NULL);
+ ret = 1;
+ goto out;
+ }
+ empty_index = afr_get_child_index_from_name(this, empty_brick);
+
+ if (empty_index < 0) {
+ /* Didn't belong to this replica pair
+ * Just do a no-op
+ */
+ AFR_STACK_UNWIND(setxattr, frame, 0, 0, NULL);
+ return 0;
+ } else {
+ data = GF_CALLOC(1, sizeof(*data), gf_afr_mt_empty_brick_t);
+ if (!data) {
+ ret = 1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ data->frame = frame;
+ loc_copy(&data->loc, loc);
+ data->empty_index = empty_index;
+ data->op_type = op_type;
+ ret = synctask_new(this->ctx->env, _afr_handle_empty_brick,
+ _afr_handle_empty_brick_cbk, NULL, data);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_STATUS,
+ NULL);
+ ret = 1;
+ op_errno = ENOMEM;
+ afr_brick_args_cleanup(data);
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ if (ret == 1) {
+ AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
+ ret = 0;
+ }
+ return ret;
+}
- spb_child_str = alloca0 (len + 1);
- memcpy (spb_child_str, value, len);
+static int
+afr_handle_special_xattr(xlator_t *this, call_frame_t *frame, loc_t *loc,
+ dict_t *dict)
+{
+ int ret = -1;
- if (!strcmp (spb_child_str, "none"))
- return -2;
+ ret = afr_handle_split_brain_commands(this, frame, loc, dict);
+ if (ret == 0)
+ goto out;
- spb_child_index = afr_get_child_index_from_name (this,
- spb_child_str);
- if (spb_child_index < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Invalid subvol: %s",
- spb_child_str);
- }
- return spb_child_index;
-}
-
-int
-afr_handle_split_brain_commands (xlator_t *this, call_frame_t *frame,
- loc_t *loc, dict_t *dict)
-{
- int len = 0;
- void *value = NULL;
- int spb_child_index = -1;
- int ret = -1;
- int op_errno = EINVAL;
- afr_private_t *priv = NULL;
-
- priv = this->private;
-
- ret = dict_get_ptr_and_len (dict, GF_AFR_SBRAIN_CHOICE, &value,
- &len);
- if (value) {
- spb_child_index = afr_get_split_brain_child_index (this, value,
- len);
- if (spb_child_index < 0) {
- /* Case where value was "none" */
- if (spb_child_index == -2)
- spb_child_index = -1;
- else {
- ret = 1;
- goto out;
- }
- }
-
- afr_set_split_brain_choice (frame, this, loc,
- spb_child_index);
- ret = 0;
- goto out;
- }
+ ret = afr_handle_spb_choice_timeout(this, frame, dict);
+ if (ret == 0)
+ goto out;
- ret = dict_get_ptr_and_len (dict, GF_AFR_SBRAIN_RESOLVE, &value, &len);
- if (value) {
- spb_child_index = afr_get_split_brain_child_index (this, value,
- len);
- if (spb_child_index < 0) {
- ret = 1;
- goto out;
- }
-
- afr_split_brain_resolve_do (frame, this, loc,
- priv->children[spb_child_index]->name);
- ret = 0;
- }
+ /* Applicable for replace-brick and add-brick commands */
+ ret = afr_handle_empty_brick(this, frame, loc, dict);
out:
- /* key was correct but value was invalid when ret == 1 */
- if (ret == 1) {
- AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
- ret = 0;
- }
- return ret;
+ return ret;
}
int
-afr_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags, dict_t *xdata)
+afr_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = EINVAL;
-
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
- op_errno, out);
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
- op_errno, out);
+ GF_IF_INTERNAL_XATTR_GOTO("trusted.afr.*", dict, op_errno, out);
- ret = afr_handle_split_brain_commands (this, frame, loc, dict);
+ GF_IF_INTERNAL_XATTR_GOTO("trusted.glusterfs.afr.*", dict, op_errno, out);
- if (ret == 0)
- return 0;
+ ret = afr_handle_special_xattr(this, frame, loc, dict);
+ if (ret == 0)
+ return 0;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame)
- goto out;
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- local = AFR_FRAME_INIT (transaction_frame, op_errno);
- if (!local)
- goto out;
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- local->cont.setxattr.dict = dict_ref (dict);
- local->cont.setxattr.flags = flags;
- if (xdata)
- local->xdata_req = dict_copy_with_ref (xdata, NULL);
- else
- local->xdata_req = dict_new ();
+ local->cont.setxattr.dict = dict_ref(dict);
+ local->cont.setxattr.flags = flags;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- if (!local->xdata_req)
- goto out;
+ if (!local->xdata_req)
+ goto out;
- local->transaction.wind = afr_setxattr_wind;
- local->transaction.fop = __afr_txn_write_fop;
- local->transaction.done = __afr_txn_write_done;
- local->transaction.unwind = afr_setxattr_unwind;
+ local->transaction.wind = afr_setxattr_wind;
+ local->transaction.unwind = afr_setxattr_unwind;
- loc_copy (&local->loc, loc);
- local->inode = inode_ref (loc->inode);
+ loc_copy(&local->loc, loc);
+ ret = afr_set_inode_local(this, local, loc->inode);
+ if (ret)
+ goto out;
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- local->op = GF_FOP_SETXATTR;
+ local->op = GF_FOP_SETXATTR;
- ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- return 0;
+ return 0;
out:
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
/* {{{ fsetxattr */
-
int
-afr_fsetxattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_fsetxattr_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- main_frame = afr_transaction_detach_fop_frame (frame);
- if (!main_frame)
- return 0;
+ local = frame->local;
- AFR_STACK_UNWIND (fsetxattr, main_frame, local->op_ret, local->op_errno,
- local->xdata_rsp);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(fsetxattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
+}
int
-afr_fsetxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_fsetxattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
- NULL, NULL, xdata);
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ NULL, NULL, xdata);
}
-
int
-afr_fsetxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_fsetxattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- priv = this->private;
-
- STACK_WIND_COOKIE (frame, afr_fsetxattr_wind_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->fsetxattr,
- local->fd, local->cont.fsetxattr.dict,
- local->cont.fsetxattr.flags, local->xdata_req);
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_fsetxattr_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fsetxattr, local->fd,
+ local->cont.fsetxattr.dict, local->cont.fsetxattr.flags,
+ local->xdata_req);
+ return 0;
}
-
int
-afr_fsetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata)
+afr_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = ENOMEM;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
- op_errno, out);
+ GF_IF_INTERNAL_XATTR_GOTO("trusted.afr.*", dict, op_errno, out);
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
- op_errno, out);
+ GF_IF_INTERNAL_XATTR_GOTO("trusted.glusterfs.afr.*", dict, op_errno, out);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame)
- goto out;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- local = AFR_FRAME_INIT (transaction_frame, op_errno);
- if (!local)
- goto out;
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- local->cont.fsetxattr.dict = dict_ref (dict);
- local->cont.fsetxattr.flags = flags;
+ local->cont.fsetxattr.dict = dict_ref(dict);
+ local->cont.fsetxattr.flags = flags;
- if (xdata)
- local->xdata_req = dict_copy_with_ref (xdata, NULL);
- else
- local->xdata_req = dict_new ();
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- if (!local->xdata_req)
- goto out;
+ if (!local->xdata_req)
+ goto out;
- local->transaction.wind = afr_fsetxattr_wind;
- local->transaction.fop = __afr_txn_write_fop;
- local->transaction.done = __afr_txn_write_done;
- local->transaction.unwind = afr_fsetxattr_unwind;
+ local->transaction.wind = afr_fsetxattr_wind;
+ local->transaction.unwind = afr_fsetxattr_unwind;
- local->fd = fd_ref (fd);
- local->inode = inode_ref (fd->inode);
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
- local->op = GF_FOP_FSETXATTR;
+ local->op = GF_FOP_FSETXATTR;
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- return 0;
+ return 0;
out:
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- AFR_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
- return 0;
+ AFR_STACK_UNWIND(fsetxattr, frame, -1, op_errno, NULL);
+ return 0;
}
/* }}} */
-
/* {{{ removexattr */
-
int
-afr_removexattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_removexattr_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- main_frame = afr_transaction_detach_fop_frame (frame);
- if (!main_frame)
- return 0;
+ local = frame->local;
- AFR_STACK_UNWIND (removexattr, main_frame, local->op_ret, local->op_errno,
- local->xdata_rsp);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(removexattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
+}
int
-afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_removexattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
- NULL, NULL, xdata);
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ NULL, NULL, xdata);
}
-
int
-afr_removexattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_removexattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->removexattr,
- &local->loc, local->cont.removexattr.name,
- local->xdata_req);
- return 0;
+ STACK_WIND_COOKIE(frame, afr_removexattr_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->removexattr, &local->loc,
+ local->cont.removexattr.name, local->xdata_req);
+ return 0;
}
-
int
-afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata)
+afr_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = ENOMEM;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
- name, op_errno, out);
+ GF_IF_NATIVE_XATTR_GOTO("trusted.afr.*", name, op_errno, out);
- GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
- name, op_errno, out);
+ GF_IF_NATIVE_XATTR_GOTO("trusted.glusterfs.afr.*", name, op_errno, out);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame)
- goto out;
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- local = AFR_FRAME_INIT (transaction_frame, op_errno);
- if (!local)
- goto out;
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- local->cont.removexattr.name = gf_strdup (name);
+ local->cont.removexattr.name = gf_strdup(name);
- if (xdata)
- local->xdata_req = dict_copy_with_ref (xdata, NULL);
- else
- local->xdata_req = dict_new ();
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- if (!local->xdata_req)
- goto out;
+ if (!local->xdata_req)
+ goto out;
- local->transaction.wind = afr_removexattr_wind;
- local->transaction.fop = __afr_txn_write_fop;
- local->transaction.done = __afr_txn_write_done;
- local->transaction.unwind = afr_removexattr_unwind;
+ local->transaction.wind = afr_removexattr_wind;
+ local->transaction.unwind = afr_removexattr_unwind;
- loc_copy (&local->loc, loc);
- local->inode = inode_ref (loc->inode);
+ loc_copy(&local->loc, loc);
+ ret = afr_set_inode_local(this, local, loc->inode);
+ if (ret)
+ goto out;
- local->op = GF_FOP_REMOVEXATTR;
+ local->op = GF_FOP_REMOVEXATTR;
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- return 0;
+ return 0;
out:
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
- return 0;
+ AFR_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL);
+ return 0;
}
/* ffremovexattr */
int
-afr_fremovexattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_fremovexattr_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
+ local = frame->local;
- main_frame = afr_transaction_detach_fop_frame (frame);
- if (!main_frame)
- return 0;
-
- AFR_STACK_UNWIND (fremovexattr, main_frame, local->op_ret, local->op_errno,
- local->xdata_rsp);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(fremovexattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
+}
int
-afr_fremovexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+afr_fremovexattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
- NULL, NULL, xdata);
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ NULL, NULL, xdata);
}
-
int
-afr_fremovexattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_fremovexattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- STACK_WIND_COOKIE (frame, afr_fremovexattr_wind_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->fremovexattr,
- local->fd, local->cont.removexattr.name,
- local->xdata_req);
- return 0;
+ STACK_WIND_COOKIE(frame, afr_fremovexattr_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fremovexattr, local->fd,
+ local->cont.removexattr.name, local->xdata_req);
+ return 0;
}
-
int
-afr_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
+afr_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = ENOMEM;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
- name, op_errno, out);
+ GF_IF_NATIVE_XATTR_GOTO("trusted.afr.*", name, op_errno, out);
- GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
- name, op_errno, out);
+ GF_IF_NATIVE_XATTR_GOTO("trusted.glusterfs.afr.*", name, op_errno, out);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame)
- goto out;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- local = AFR_FRAME_INIT (transaction_frame, op_errno);
- if (!local)
- goto out;
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- local->cont.removexattr.name = gf_strdup (name);
- if (xdata)
- local->xdata_req = dict_copy_with_ref (xdata, NULL);
- else
- local->xdata_req = dict_new ();
+ local->cont.removexattr.name = gf_strdup(name);
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- if (!local->xdata_req)
- goto out;
+ if (!local->xdata_req)
+ goto out;
- local->transaction.wind = afr_fremovexattr_wind;
- local->transaction.fop = __afr_txn_write_fop;
- local->transaction.done = __afr_txn_write_done;
- local->transaction.unwind = afr_fremovexattr_unwind;
+ local->transaction.wind = afr_fremovexattr_wind;
+ local->transaction.unwind = afr_fremovexattr_unwind;
- local->fd = fd_ref (fd);
- local->inode = inode_ref (fd->inode);
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
- local->op = GF_FOP_FREMOVEXATTR;
+ local->op = GF_FOP_FREMOVEXATTR;
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- return 0;
+ return 0;
out:
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- AFR_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL);
+ AFR_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
-
int
-afr_fallocate_unwind (call_frame_t *frame, xlator_t *this)
+afr_fallocate_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
+ local = frame->local;
- main_frame = afr_transaction_detach_fop_frame (frame);
- if (!main_frame)
- return 0;
-
- AFR_STACK_UNWIND (fallocate, main_frame, local->op_ret, local->op_errno,
- &local->cont.inode_wfop.prebuf,
- &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(fallocate, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
int
-afr_fallocate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+afr_fallocate_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
- prebuf, postbuf, xdata);
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, NULL, xdata);
}
-
int
-afr_fallocate_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_fallocate_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- priv = this->private;
-
- STACK_WIND_COOKIE (frame, afr_fallocate_wind_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->fallocate,
- local->fd, local->cont.fallocate.mode,
- local->cont.fallocate.offset,
- local->cont.fallocate.len, local->xdata_req);
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_fallocate_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fallocate, local->fd,
+ local->cont.fallocate.mode, local->cont.fallocate.offset,
+ local->cont.fallocate.len, local->xdata_req);
+ return 0;
}
-
int
-afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
- off_t offset, size_t len, dict_t *xdata)
+afr_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
{
- call_frame_t *transaction_frame = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int op_errno = ENOMEM;
+ call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame)
- goto out;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- local = AFR_FRAME_INIT (transaction_frame, op_errno);
- if (!local)
- goto out;
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- local->cont.fallocate.mode = mode;
- local->cont.fallocate.offset = offset;
- local->cont.fallocate.len = len;
+ local->cont.fallocate.mode = mode;
+ local->cont.fallocate.offset = offset;
+ local->cont.fallocate.len = len;
- local->fd = fd_ref (fd);
- local->inode = inode_ref (fd->inode);
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
- if (xdata)
- local->xdata_req = dict_copy_with_ref (xdata, NULL);
- else
- local->xdata_req = dict_new ();
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- if (!local->xdata_req)
- goto out;
+ if (!local->xdata_req)
+ goto out;
- local->op = GF_FOP_FALLOCATE;
+ local->op = GF_FOP_FALLOCATE;
- local->transaction.wind = afr_fallocate_wind;
- local->transaction.fop = __afr_txn_write_fop;
- local->transaction.done = __afr_txn_write_done;
- local->transaction.unwind = afr_fallocate_unwind;
+ local->transaction.wind = afr_fallocate_wind;
+ local->transaction.unwind = afr_fallocate_unwind;
- local->transaction.main_frame = frame;
+ local->transaction.main_frame = frame;
- local->transaction.start = local->cont.fallocate.offset;
- local->transaction.len = 0;
+ local->transaction.start = local->cont.fallocate.offset;
+ local->transaction.len = 0;
- afr_fix_open (fd, this);
+ afr_fix_open(fd, this);
- ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- return 0;
+ return 0;
out:
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- AFR_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ AFR_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-
/* }}} */
/* {{{ discard */
int
-afr_discard_unwind (call_frame_t *frame, xlator_t *this)
+afr_discard_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
+ local = frame->local;
- main_frame = afr_transaction_detach_fop_frame (frame);
- if (!main_frame)
- return 0;
-
- AFR_STACK_UNWIND (discard, main_frame, local->op_ret, local->op_errno,
- &local->cont.inode_wfop.prebuf,
- &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
+
+ AFR_STACK_UNWIND(discard, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
}
+int
+afr_discard_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, NULL, xdata);
+}
int
-afr_discard_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+afr_discard_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
- prebuf, postbuf, xdata);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_discard_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->discard, local->fd,
+ local->cont.discard.offset, local->cont.discard.len,
+ local->xdata_req);
+ return 0;
}
+int
+afr_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->cont.discard.offset = offset;
+ local->cont.discard.len = len;
+
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_DISCARD;
+
+ local->transaction.wind = afr_discard_wind;
+ local->transaction.unwind = afr_discard_unwind;
+
+ local->transaction.main_frame = frame;
+
+ local->transaction.start = local->cont.discard.offset;
+ local->transaction.len = 0;
+
+ afr_fix_open(fd, this);
+
+ ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
+
+ AFR_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+/* {{{ zerofill */
int
-afr_discard_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_zerofill_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- STACK_WIND_COOKIE (frame, afr_discard_wind_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->discard,
- local->fd, local->cont.discard.offset,
- local->cont.discard.len, local->xdata_req);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
+
+ AFR_STACK_UNWIND(discard, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
}
+int
+afr_zerofill_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, NULL, xdata);
+}
+
+int
+afr_zerofill_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_zerofill_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->zerofill, local->fd,
+ local->cont.zerofill.offset, local->cont.zerofill.len,
+ local->xdata_req);
+ return 0;
+}
int
-afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
size_t len, dict_t *xdata)
{
- afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = ENOMEM;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame)
- goto out;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- local = AFR_FRAME_INIT (transaction_frame, op_errno);
- if (!local)
- goto out;
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- local->cont.discard.offset = offset;
- local->cont.discard.len = len;
+ local->cont.zerofill.offset = offset;
+ local->cont.zerofill.len = len;
- local->fd = fd_ref (fd);
- local->inode = inode_ref (fd->inode);
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
- if (xdata)
- local->xdata_req = dict_copy_with_ref (xdata, NULL);
- else
- local->xdata_req = dict_new ();
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- if (!local->xdata_req)
- goto out;
+ if (!local->xdata_req)
+ goto out;
- local->op = GF_FOP_DISCARD;
+ local->op = GF_FOP_ZEROFILL;
- local->transaction.wind = afr_discard_wind;
- local->transaction.fop = __afr_txn_write_fop;
- local->transaction.done = __afr_txn_write_done;
- local->transaction.unwind = afr_discard_unwind;
+ local->transaction.wind = afr_zerofill_wind;
+ local->transaction.unwind = afr_zerofill_unwind;
- local->transaction.main_frame = frame;
+ local->transaction.main_frame = frame;
- local->transaction.start = local->cont.discard.offset;
- local->transaction.len = 0;
+ local->transaction.start = local->cont.zerofill.offset;
+ local->transaction.len = len;
- afr_fix_open (fd, this);
+ afr_fix_open(fd, this);
- ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- return 0;
+ return 0;
out:
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- AFR_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ AFR_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
+/* }}} */
-/* {{{ zerofill */
+int32_t
+afr_xattrop_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ NULL, xattr, xdata);
+}
int
-afr_zerofill_unwind (call_frame_t *frame, xlator_t *this)
+afr_xattrop_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_xattrop_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->xattrop, &local->loc,
+ local->cont.xattrop.optype, local->cont.xattrop.xattr,
+ local->xdata_req);
+ return 0;
+}
- local = frame->local;
+int
+afr_xattrop_unwind(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- main_frame = afr_transaction_detach_fop_frame (frame);
- if (!main_frame)
- return 0;
+ local = frame->local;
- AFR_STACK_UNWIND (discard, main_frame, local->op_ret, local->op_errno,
- &local->cont.inode_wfop.prebuf,
- &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
+
+ AFR_STACK_UNWIND(xattrop, main_frame, local->op_ret, local->op_errno,
+ local->xattr_rsp, local->xdata_rsp);
+ return 0;
+}
+
+int32_t
+afr_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->cont.xattrop.xattr = dict_ref(xattr);
+ local->cont.xattrop.optype = optype;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ local->transaction.wind = afr_xattrop_wind;
+ local->transaction.unwind = afr_xattrop_unwind;
+
+ loc_copy(&local->loc, loc);
+ ret = afr_set_inode_local(this, local, loc->inode);
+ if (ret)
+ goto out;
+
+ local->op = GF_FOP_XATTROP;
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
+
+ AFR_STACK_UNWIND(xattrop, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
+int32_t
+afr_fxattrop_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ NULL, xattr, xdata);
+}
int
-afr_zerofill_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+afr_fxattrop_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
- prebuf, postbuf, xdata);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_fxattrop_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fxattrop, local->fd,
+ local->cont.xattrop.optype, local->cont.xattrop.xattr,
+ local->xdata_req);
+ return 0;
}
-
int
-afr_zerofill_wind (call_frame_t *frame, xlator_t *this, int subvol)
+afr_fxattrop_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- STACK_WIND_COOKIE (frame, afr_zerofill_wind_cbk, (void *) (long) subvol,
- priv->children[subvol],
- priv->children[subvol]->fops->zerofill,
- local->fd, local->cont.zerofill.offset,
- local->cont.zerofill.len, local->xdata_req);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
+
+ AFR_STACK_UNWIND(fxattrop, main_frame, local->op_ret, local->op_errno,
+ local->xattr_rsp, local->xdata_rsp);
+ return 0;
+}
+
+int32_t
+afr_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->cont.xattrop.xattr = dict_ref(xattr);
+ local->cont.xattrop.optype = optype;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ local->transaction.wind = afr_fxattrop_wind;
+ local->transaction.unwind = afr_fxattrop_unwind;
+
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
+
+ local->op = GF_FOP_FXATTROP;
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
+
+ AFR_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
int
-afr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- size_t len, dict_t *xdata)
+afr_fsync_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = ENOMEM;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
+ return 0;
+
+ AFR_STACK_UNWIND(fsync, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame)
- goto out;
+ return 0;
+}
+
+int
+afr_fsync_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, NULL, xdata);
+}
- local = AFR_FRAME_INIT (transaction_frame, op_errno);
- if (!local)
- goto out;
+int
+afr_fsync_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local->cont.zerofill.offset = offset;
- local->cont.zerofill.len = len;
+ local = frame->local;
+ priv = this->private;
- local->fd = fd_ref (fd);
- local->inode = inode_ref (fd->inode);
+ STACK_WIND_COOKIE(frame, afr_fsync_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fsync, local->fd,
+ local->cont.fsync.datasync, local->xdata_req);
+ return 0;
+}
- if (xdata)
- local->xdata_req = dict_copy_with_ref (xdata, NULL);
- else
- local->xdata_req = dict_new ();
+int
+afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int32_t op_errno = ENOMEM;
+ int8_t last_fsync = 0;
+
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ if (xdata) {
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ if (dict_get_int8(xdata, "last-fsync", &last_fsync) == 0) {
+ if (last_fsync) {
+ local->transaction.disable_delayed_post_op = _gf_true;
+ }
+ }
+ } else {
+ local->xdata_req = dict_new();
+ }
- if (!local->xdata_req)
- goto out;
+ if (!local->xdata_req)
+ goto out;
- local->op = GF_FOP_ZEROFILL;
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
- local->transaction.wind = afr_zerofill_wind;
- local->transaction.fop = __afr_txn_write_fop;
- local->transaction.done = __afr_txn_write_done;
- local->transaction.unwind = afr_zerofill_unwind;
+ local->op = GF_FOP_FSYNC;
+ local->cont.fsync.datasync = datasync;
- local->transaction.main_frame = frame;
+ if (afr_fd_has_witnessed_unstable_write(this, fd->inode)) {
+ /* don't care. we only wanted to CLEAR the bit */
+ }
- local->transaction.start = local->cont.discard.offset;
- local->transaction.len = len;
+ local->transaction.wind = afr_fsync_wind;
+ local->transaction.unwind = afr_fsync_unwind;
- afr_fix_open (fd, this);
+ local->transaction.main_frame = frame;
- ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- return 0;
+ return 0;
out:
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- AFR_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
+ AFR_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL);
-/* }}} */
+ return 0;
+}
diff --git a/xlators/cluster/afr/src/afr-inode-write.h b/xlators/cluster/afr/src/afr-inode-write.h
index 7b1fc552880..a787069b7a1 100644
--- a/xlators/cluster/afr/src/afr-inode-write.h
+++ b/xlators/cluster/afr/src/afr-inode-write.h
@@ -12,71 +12,83 @@
#define __INODE_WRITE_H__
int32_t
-afr_chmod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *xdata);
+afr_chmod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dict_t *xdata);
int32_t
-afr_chown (call_frame_t *frame, xlator_t *this,
- loc_t *loc, uid_t uid, gid_t gid, dict_t *xdata);
+afr_chown(call_frame_t *frame, xlator_t *this, loc_t *loc, uid_t uid, gid_t gid,
+ dict_t *xdata);
int
-afr_fchown (call_frame_t *frame, xlator_t *this,
- fd_t *fd, uid_t uid, gid_t gid, dict_t *xdata);
+afr_fchown(call_frame_t *frame, xlator_t *this, fd_t *fd, uid_t uid, gid_t gid,
+ dict_t *xdata);
int32_t
-afr_fchmod (call_frame_t *frame, xlator_t *this,
- fd_t *fd, mode_t mode, dict_t *xdata);
+afr_fchmod(call_frame_t *frame, xlator_t *this, fd_t *fd, mode_t mode,
+ dict_t *xdata);
int32_t
-afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- uint32_t flags, struct iobref *iobref, dict_t *xdata);
+afr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata);
int32_t
-afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset, dict_t *xdata);
+afr_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata);
int32_t
-afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset, dict_t *xdata);
+afr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata);
int32_t
-afr_utimens (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct timespec tv[2], dict_t *xdata);
+afr_utimens(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct timespec tv[2], dict_t *xdata);
int
-afr_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *buf, int32_t valid, dict_t *xdata);
+afr_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf,
+ int32_t valid, dict_t *xdata);
int
-afr_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata);
+afr_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *buf,
+ int32_t valid, dict_t *xdata);
int32_t
-afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata);
+afr_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata);
int32_t
-afr_fsetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata);
+afr_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata);
int32_t
-afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata);
+afr_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
int32_t
-afr_fremovexattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name, dict_t *xdata);
+afr_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata);
int
-afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- size_t len, dict_t *xdata);
+afr_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
int
-afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
- off_t offset, size_t len, dict_t *xdata);
+afr_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata);
int
afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
off_t len, dict_t *xdata);
+
+int32_t
+afr_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
+
+int32_t
+afr_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
+
+int
+afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata);
#endif /* __INODE_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index dc4dfbc42c0..bc8eabe0f43 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -8,1664 +8,784 @@
cases as published by the Free Software Foundation.
*/
-#include "dict.h"
-#include "byte-order.h"
-#include "common-utils.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/common-utils.h>
#include "afr.h"
#include "afr-transaction.h"
+#include "afr-messages.h"
#include <signal.h>
-
-#define LOCKED_NO 0x0 /* no lock held */
-#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path */
-#define LOCKED_LOWER 0x2 /* for lower path */
-
-#define AFR_TRACE_INODELK_IN(frame, this, params ...) \
- do { \
- afr_private_t *_priv = this->private; \
- if (!_priv->inodelk_trace) \
- break; \
- afr_trace_inodelk_in (frame, this, params); \
- } while (0);
-
-#define AFR_TRACE_INODELK_OUT(frame, this, params ...) \
- do { \
- afr_private_t *_priv = this->private; \
- if (!_priv->inodelk_trace) \
- break; \
- afr_trace_inodelk_out (frame, this, params); \
- } while (0);
-
-#define AFR_TRACE_ENTRYLK_IN(frame, this, params ...) \
- do { \
- afr_private_t *_priv = this->private; \
- if (!_priv->entrylk_trace) \
- break; \
- afr_trace_entrylk_in (frame, this, params); \
- } while (0);
-
-#define AFR_TRACE_ENTRYLK_OUT(frame, this, params ...) \
- do { \
- afr_private_t *_priv = this->private; \
- if (!_priv->entrylk_trace) \
- break; \
- afr_trace_entrylk_out (frame, this, params); \
- } while (0);
-
-int
-afr_entry_lockee_cmp (const void *l1, const void *l2)
-{
- const afr_entry_lockee_t *r1 = l1;
- const afr_entry_lockee_t *r2 = l2;
- int ret = 0;
- uuid_t gfid1 = {0};
- uuid_t gfid2 = {0};
-
- loc_gfid ((loc_t*)&r1->loc, gfid1);
- loc_gfid ((loc_t*)&r2->loc, gfid2);
- ret = gf_uuid_compare (gfid1, gfid2);
- /*Entrylks with NULL basename are the 'smallest'*/
- if (ret == 0) {
- if (!r1->basename)
- return -1;
- if (!r2->basename)
- return 1;
- ret = strcmp (r1->basename, r2->basename);
- }
-
- if (ret <= 0)
- return -1;
- else
- return 1;
-}
-
-int afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index);
-
-static int
-afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this);
-
-static uint64_t afr_lock_number = 1;
-
-static uint64_t
-get_afr_lock_number ()
-{
- return (++afr_lock_number);
-}
-
-int
-afr_set_lock_number (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->lock_number = get_afr_lock_number ();
-
- return 0;
-}
+#define LOCKED_NO 0x0 /* no lock held */
+#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path */
+#define LOCKED_LOWER 0x2 /* for lower path */
void
-afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner)
-{
- gf_log (this->name, GF_LOG_TRACE,
- "Setting lk-owner=%llu",
- (unsigned long long) (unsigned long)lk_owner);
-
- set_lk_owner_from_ptr (&frame->root->lk_owner, lk_owner);
-}
-
-static int
-is_afr_lock_selfheal (afr_local_t *local)
-{
- afr_internal_lock_t *int_lock = NULL;
- int ret = -1;
-
- int_lock = &local->internal_lock;
-
- switch (int_lock->selfheal_lk_type) {
- case AFR_DATA_SELF_HEAL_LK:
- case AFR_METADATA_SELF_HEAL_LK:
- ret = 1;
- break;
- case AFR_ENTRY_SELF_HEAL_LK:
- ret = 0;
- break;
- }
-
- return ret;
-
-}
-
-int32_t
-internal_lock_count (call_frame_t *frame, xlator_t *this)
+afr_lockee_cleanup(afr_lockee_t *lockee)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int32_t call_count = 0;
- int i = 0;
+ if (lockee->fd) {
+ fd_unref(lockee->fd);
+ lockee->fd = NULL;
+ } else {
+ loc_wipe(&lockee->loc);
+ }
- local = frame->local;
- priv = this->private;
+ GF_FREE(lockee->basename);
+ lockee->basename = NULL;
+ GF_FREE(lockee->locked_nodes);
+ lockee->locked_nodes = NULL;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i])
- ++call_count;
- }
-
- return call_count;
-}
-
-static void
-afr_print_inodelk (char *str, int size, int cmd,
- struct gf_flock *flock, gf_lkowner_t *owner)
-{
- char *cmd_str = NULL;
- char *type_str = NULL;
-
- switch (cmd) {
-#if F_GETLK != F_GETLK64
- case F_GETLK64:
-#endif
- case F_GETLK:
- cmd_str = "GETLK";
- break;
-
-#if F_SETLK != F_SETLK64
- case F_SETLK64:
-#endif
- case F_SETLK:
- cmd_str = "SETLK";
- break;
-
-#if F_SETLKW != F_SETLKW64
- case F_SETLKW64:
-#endif
- case F_SETLKW:
- cmd_str = "SETLKW";
- break;
-
- default:
- cmd_str = "<null>";
- break;
- }
-
- switch (flock->l_type) {
- case F_RDLCK:
- type_str = "READ";
- break;
- case F_WRLCK:
- type_str = "WRITE";
- break;
- case F_UNLCK:
- type_str = "UNLOCK";
- break;
- default:
- type_str = "UNKNOWN";
- break;
- }
-
- snprintf (str, size, "lock=INODELK, cmd=%s, type=%s, "
- "start=%llu, len=%llu, pid=%llu, lk-owner=%s",
- cmd_str, type_str, (unsigned long long) flock->l_start,
- (unsigned long long) flock->l_len,
- (unsigned long long) flock->l_pid,
- lkowner_utoa (owner));
-
-}
-
-static void
-afr_print_lockee (char *str, int size, loc_t *loc, fd_t *fd,
- int child_index)
-{
- snprintf (str, size, "path=%s, fd=%p, child=%d",
- loc->path ? loc->path : "<nul>",
- fd ? fd : NULL,
- child_index);
+ return;
}
void
-afr_print_entrylk (char *str, int size, const char *basename,
- gf_lkowner_t *owner)
-{
- snprintf (str, size, "Basename=%s, lk-owner=%s",
- basename ? basename : "<nul>",
- lkowner_utoa (owner));
-}
-
-static void
-afr_print_verdict (int op_ret, int op_errno, char *str)
+afr_lockees_cleanup(afr_internal_lock_t *int_lock)
{
- if (op_ret < 0) {
- if (op_errno == EAGAIN)
- strcpy (str, "EAGAIN");
- else
- strcpy (str, "FAILED");
- }
- else
- strcpy (str, "GRANTED");
-}
+ int i = 0;
-static void
-afr_set_lock_call_type (afr_lock_call_type_t lock_call_type,
- char *lock_call_type_str,
- afr_internal_lock_t *int_lock)
-{
- switch (lock_call_type) {
- case AFR_INODELK_TRANSACTION:
- if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
- strcpy (lock_call_type_str, "AFR_INODELK_TRANSACTION");
- else
- strcpy (lock_call_type_str, "AFR_INODELK_SELFHEAL");
- break;
- case AFR_INODELK_NB_TRANSACTION:
- if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
- strcpy (lock_call_type_str, "AFR_INODELK_NB_TRANSACTION");
- else
- strcpy (lock_call_type_str, "AFR_INODELK_NB_SELFHEAL");
- break;
- case AFR_ENTRYLK_TRANSACTION:
- if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
- strcpy (lock_call_type_str, "AFR_ENTRYLK_TRANSACTION");
- else
- strcpy (lock_call_type_str, "AFR_ENTRYLK_SELFHEAL");
- break;
- case AFR_ENTRYLK_NB_TRANSACTION:
- if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
- strcpy (lock_call_type_str, "AFR_ENTRYLK_NB_TRANSACTION");
- else
- strcpy (lock_call_type_str, "AFR_ENTRYLK_NB_SELFHEAL");
- break;
- default:
- strcpy (lock_call_type_str, "UNKNOWN");
- break;
- }
+ for (i = 0; i < int_lock->lockee_count; i++) {
+ afr_lockee_cleanup(&int_lock->lockee[i]);
+ }
+ return;
}
-
-static void
-afr_trace_inodelk_out (call_frame_t *frame, xlator_t *this,
- afr_lock_call_type_t lock_call_type,
- afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
- int op_ret, int op_errno, int32_t child_index)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- char lockee[256];
- char lock_call_type_str[256];
- char verdict[16];
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
-
- afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
-
- afr_print_verdict (op_ret, op_errno, verdict);
-
- gf_log (this->name, GF_LOG_INFO,
- "[%s %s] [%s] lk-owner=%s Lockee={%s} Number={%llu}",
- lock_call_type_str,
- lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
- verdict, lkowner_utoa (&frame->root->lk_owner), lockee,
- (unsigned long long) int_lock->lock_number);
-
-}
-
-static void
-afr_trace_inodelk_in (call_frame_t *frame, xlator_t *this,
- afr_lock_call_type_t lock_call_type,
- afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
- int32_t cmd, int32_t child_index)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
-
- char lock[256];
- char lockee[256];
- char lock_call_type_str[256];
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- afr_print_inodelk (lock, 256, cmd, flock, &frame->root->lk_owner);
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
-
- afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
-
- gf_log (this->name, GF_LOG_INFO,
- "[%s %s] Lock={%s} Lockee={%s} Number={%llu}",
- lock_call_type_str,
- lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST",
- lock, lockee,
- (unsigned long long) int_lock->lock_number);
-
+int
+afr_entry_lockee_cmp(const void *l1, const void *l2)
+{
+ const afr_lockee_t *r1 = l1;
+ const afr_lockee_t *r2 = l2;
+ int ret = 0;
+ uuid_t gfid1 = {0};
+ uuid_t gfid2 = {0};
+
+ loc_gfid((loc_t *)&r1->loc, gfid1);
+ loc_gfid((loc_t *)&r2->loc, gfid2);
+ ret = gf_uuid_compare(gfid1, gfid2);
+ /*Entrylks with NULL basename are the 'smallest'*/
+ if (ret == 0) {
+ if (!r1->basename)
+ return -1;
+ if (!r2->basename)
+ return 1;
+ ret = strcmp(r1->basename, r2->basename);
+ }
+
+ if (ret <= 0)
+ return -1;
+ else
+ return 1;
}
-static void
-afr_trace_entrylk_in (call_frame_t *frame, xlator_t *this,
- afr_lock_call_type_t lock_call_type,
- afr_lock_op_type_t lk_op_type, const char *basename,
- int32_t cookie)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- int child_index = 0;
- int lockee_no = 0;
-
- char lock[256];
- char lockee[256];
- char lock_call_type_str[256];
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- if (!priv->entrylk_trace) {
- return;
- }
- lockee_no = cookie / priv->child_count;
- child_index = cookie % priv->child_count;
-
- afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
- afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd,
- child_index);
-
- afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
-
- gf_log (this->name, GF_LOG_INFO,
- "[%s %s] Lock={%s} Lockee={%s} Number={%llu}, Cookie={%d}",
- lock_call_type_str,
- lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST",
- lock, lockee,
- (unsigned long long) int_lock->lock_number,
- cookie);
-}
+int
+afr_lock_blocking(call_frame_t *frame, xlator_t *this, int child_index);
-static void
-afr_trace_entrylk_out (call_frame_t *frame, xlator_t *this,
- afr_lock_call_type_t lock_call_type,
- afr_lock_op_type_t lk_op_type, const char *basename,
- int op_ret, int op_errno, int32_t cookie)
+void
+afr_set_lk_owner(call_frame_t *frame, xlator_t *this, void *lk_owner)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int lockee_no = 0;
- int child_index = 0;
-
- char lock[256];
- char lockee[256];
- char lock_call_type_str[256];
- char verdict[16];
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- if (!priv->entrylk_trace) {
- return;
- }
- lockee_no = cookie / priv->child_count;
- child_index = cookie % priv->child_count;
-
- afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
- afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd,
- child_index);
-
- afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
-
- afr_print_verdict (op_ret, op_errno, verdict);
-
- gf_log (this->name, GF_LOG_INFO,
- "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu} Cookie={%d}",
- lock_call_type_str,
- lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
- verdict,
- lock, lockee,
- (unsigned long long) int_lock->lock_number,
- cookie);
+ gf_msg_trace(this->name, 0, "Setting lk-owner=%llu",
+ (unsigned long long)(unsigned long)lk_owner);
+ set_lk_owner_from_ptr(&frame->root->lk_owner, lk_owner);
}
-static int
-transaction_lk_op (afr_local_t *local)
+int32_t
+internal_lock_count(call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- int ret = -1;
-
- int_lock = &local->internal_lock;
-
- if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "lk op is for a transaction");
- ret = 1;
- }
- else if (int_lock->transaction_lk_type == AFR_SELFHEAL_LK) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "lk op is for a self heal");
-
- ret = 0;
- }
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int32_t call_count = 0;
+ int i = 0;
- if (ret == -1)
- gf_log (THIS->name, GF_LOG_DEBUG,
- "lk op is not set");
+ local = frame->local;
+ priv = this->private;
- return ret;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i])
+ ++call_count;
+ }
+ return call_count;
}
-static int
-is_afr_lock_transaction (afr_local_t *local)
+int
+afr_add_entry_lockee(afr_local_t *local, loc_t *loc, char *basename,
+ int child_count)
{
- int ret = 0;
+ int ret = -ENOMEM;
+ afr_internal_lock_t *int_lock = &local->internal_lock;
+ afr_lockee_t *lockee = &int_lock->lockee[int_lock->lockee_count];
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- ret = 1;
- break;
+ GF_ASSERT(int_lock->lockee_count < AFR_LOCKEE_COUNT_MAX);
+ loc_copy(&lockee->loc, loc);
+ lockee->basename = (basename) ? gf_strdup(basename) : NULL;
+ if (basename && !lockee->basename)
+ goto out;
- case AFR_ENTRY_RENAME_TRANSACTION:
- case AFR_ENTRY_TRANSACTION:
- ret = 0;
- break;
+ lockee->locked_count = 0;
+ lockee->locked_nodes = GF_CALLOC(child_count, sizeof(*lockee->locked_nodes),
+ gf_afr_mt_afr_node_character);
- }
+ if (!lockee->locked_nodes)
+ goto out;
- return ret;
+ ret = 0;
+ int_lock->lockee_count++;
+out:
+ if (ret) {
+ afr_lockee_cleanup(lockee);
+ }
+ return ret;
}
int
-afr_init_entry_lockee (afr_entry_lockee_t *lockee, afr_local_t *local,
- loc_t *loc, char *basename, int child_count)
+afr_add_inode_lockee(afr_local_t *local, int child_count)
{
- int ret = -1;
+ int ret = -ENOMEM;
+ afr_internal_lock_t *int_lock = &local->internal_lock;
+ afr_lockee_t *lockee = &int_lock->lockee[int_lock->lockee_count];
- loc_copy (&lockee->loc, loc);
- lockee->basename = (basename)? gf_strdup (basename): NULL;
- if (basename && !lockee->basename)
- goto out;
+ if (local->fd) {
+ lockee->fd = fd_ref(local->fd);
+ } else {
+ loc_copy(&lockee->loc, &local->loc);
+ }
- lockee->locked_count = 0;
- lockee->locked_nodes = GF_CALLOC (child_count,
- sizeof (*lockee->locked_nodes),
- gf_afr_mt_afr_node_character);
+ lockee->locked_count = 0;
+ lockee->locked_nodes = GF_CALLOC(child_count, sizeof(*lockee->locked_nodes),
+ gf_afr_mt_afr_node_character);
- if (!lockee->locked_nodes)
- goto out;
+ if (!lockee->locked_nodes)
+ goto out;
- ret = 0;
+ ret = 0;
+ int_lock->lockee_count++;
out:
- return ret;
-
-}
-
-void
-afr_entry_lockee_cleanup (afr_internal_lock_t *int_lock)
-{
- int i = 0;
-
- for (i = 0; i < int_lock->lockee_count; i++) {
- loc_wipe (&int_lock->lockee[i].loc);
- if (int_lock->lockee[i].basename)
- GF_FREE (int_lock->lockee[i].basename);
- if (int_lock->lockee[i].locked_nodes)
- GF_FREE (int_lock->lockee[i].locked_nodes);
- }
-
- return;
-}
-
-static int
-initialize_entrylk_variables (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
-
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->entrylk_lock_count = 0;
- int_lock->lock_op_ret = -1;
- int_lock->lock_op_errno = 0;
-
- for (i = 0; i < AFR_LOCKEE_COUNT_MAX; i++) {
- if (!int_lock->lockee[i].locked_nodes)
- break;
- int_lock->lockee[i].locked_count = 0;
- memset (int_lock->lockee[i].locked_nodes, 0,
- sizeof (*int_lock->lockee[i].locked_nodes) *
- priv->child_count);
- }
-
- return 0;
+ if (ret) {
+ afr_lockee_cleanup(lockee);
+ }
+ return ret;
}
static int
-initialize_inodelk_variables (call_frame_t *frame, xlator_t *this)
+initialize_internal_lock_variables(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_private_t *priv = NULL;
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
+ int i = 0;
- inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- inodelk->lock_count = 0;
- int_lock->lk_attempted_count = 0;
- int_lock->lock_op_ret = -1;
- int_lock->lock_op_errno = 0;
+ int_lock->lock_count = 0;
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_op_errno = 0;
+ int_lock->lk_attempted_count = 0;
- memset (inodelk->locked_nodes, 0,
- sizeof (*inodelk->locked_nodes) * priv->child_count);
- memset (int_lock->locked_nodes, 0,
- sizeof (*int_lock->locked_nodes) * priv->child_count);
+ for (i = 0; i < AFR_LOCKEE_COUNT_MAX; i++) {
+ if (!int_lock->lockee[i].locked_nodes)
+ break;
+ int_lock->lockee[i].locked_count = 0;
+ memset(int_lock->lockee[i].locked_nodes, 0,
+ sizeof(*int_lock->lockee[i].locked_nodes) * priv->child_count);
+ }
- return 0;
+ return 0;
}
int
-afr_lockee_locked_nodes_count (afr_internal_lock_t *int_lock)
+afr_lockee_locked_nodes_count(afr_internal_lock_t *int_lock)
{
- int call_count = 0;
- int i = 0;
+ int call_count = 0;
+ int i = 0;
- for (i = 0; i < int_lock->lockee_count; i++)
- call_count += int_lock->lockee[i].locked_count;
+ for (i = 0; i < int_lock->lockee_count; i++)
+ call_count += int_lock->lockee[i].locked_count;
- return call_count;
+ return call_count;
}
int
-afr_locked_nodes_count (unsigned char *locked_nodes, int child_count)
+afr_locked_nodes_count(unsigned char *locked_nodes, int child_count)
{
- int i = 0;
- int call_count = 0;
+ int i = 0;
+ int call_count = 0;
- for (i = 0; i < child_count; i++) {
- if (locked_nodes[i] & LOCKED_YES)
- call_count++;
- }
+ for (i = 0; i < child_count; i++) {
+ if (locked_nodes[i] & LOCKED_YES)
+ call_count++;
+ }
- return call_count;
+ return call_count;
}
-/* FIXME: What if UNLOCK fails */
-static int32_t
-afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+static void
+afr_log_locks_failure(call_frame_t *frame, char *where, char *what,
+ int op_errno)
{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- int call_count = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- LOCK (&frame->lock);
- {
- call_count = --int_lock->lk_call_count;
- }
- UNLOCK (&frame->lock);
+ xlator_t *this = frame->this;
+ gf_lkowner_t *lk_owner = &frame->root->lk_owner;
+ afr_local_t *local = frame->local;
+ const char *fop = NULL;
+ char *gfid = NULL;
+ const char *name = NULL;
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "All internal locks unlocked");
- int_lock->lock_cbk (frame, this);
- }
+ fop = gf_fop_list[local->op];
- return 0;
+ switch (local->transaction.type) {
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ case AFR_ENTRY_TRANSACTION:
+ switch (local->op) {
+ case GF_FOP_LINK:
+ gfid = uuid_utoa(local->newloc.pargfid);
+ name = local->newloc.name;
+ break;
+ default:
+ gfid = uuid_utoa(local->loc.pargfid);
+ name = local->loc.name;
+ break;
+ }
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ AFR_MSG_INTERNAL_LKS_FAILED,
+ "Unable to do entry %s with lk-owner:%s on %s "
+ "while attempting %s on {pgfid:%s, name:%s}.",
+ what, lkowner_utoa(lk_owner), where, fop, gfid, name);
+ break;
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ gfid = uuid_utoa(local->inode->gfid);
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ AFR_MSG_INTERNAL_LKS_FAILED,
+ "Unable to do inode %s with lk-owner:%s on %s "
+ "while attempting %s on gfid:%s.",
+ what, lkowner_utoa(lk_owner), where, fop, gfid);
+ break;
+ }
}
static int32_t
-afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_unlock_common_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- afr_inodelk_t *inodelk = NULL;
- int32_t child_index = (long)cookie;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
- AFR_UNLOCK_OP, NULL, op_ret,
- op_errno, child_index);
-
- priv = this->private;
-
- if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
- gf_msg (this->name, GF_LOG_ERROR, op_errno,
- AFR_MSG_INODE_UNLOCK_FAIL,
- "path=%s gfid=%s: unlock failed on subvolume %s "
- "with lock owner %s", local->loc.path,
- loc_gfid_utoa (&(local->loc)),
- priv->children[child_index]->name,
- lkowner_utoa (&frame->root->lk_owner));
- }
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int lockee_num = 0;
+ int call_count = 0;
+ int child_index = 0;
+ int ret = 0;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+ lockee_num = (int)((long)cookie) / priv->child_count;
+ child_index = (int)((long)cookie) % priv->child_count;
- inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- inodelk->locked_nodes[child_index] &= LOCKED_NO;
- if (local->transaction.eager_lock)
- local->transaction.eager_lock[child_index] = 0;
+ if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ afr_log_locks_failure(frame, priv->children[child_index]->name,
+ "unlock", op_errno);
+ }
- afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+ int_lock->lockee[lockee_num].locked_nodes[child_index] &= LOCKED_NO;
+ if (local->transaction.type == AFR_DATA_TRANSACTION && op_ret != 1)
+ ret = afr_write_subvol_reset(frame, this);
- return 0;
+ LOCK(&frame->lock);
+ {
+ call_count = --int_lock->lk_call_count;
+ }
+ UNLOCK(&frame->lock);
-}
+ if (call_count == 0) {
+ int_lock->lock_cbk(frame, this);
+ }
-static int
-afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_inodelk_t *inodelk = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- struct gf_flock flock = {0,};
- struct gf_flock full_flock = {0,};
- struct gf_flock *flock_use = NULL;
- int call_count = 0;
- int i = 0;
- int piggyback = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- inodelk = afr_get_inodelk (int_lock, int_lock->domain);
-
- flock.l_start = inodelk->flock.l_start;
- flock.l_len = inodelk->flock.l_len;
- flock.l_type = F_UNLCK;
-
- full_flock.l_type = F_UNLCK;
- call_count = afr_locked_nodes_count (inodelk->locked_nodes,
- priv->child_count);
-
- int_lock->lk_call_count = call_count;
-
- if (!call_count) {
- gf_log (this->name, GF_LOG_TRACE,
- "No internal locks unlocked");
- int_lock->lock_cbk (frame, this);
- goto out;
- }
-
- if (local->fd)
- fd_ctx = afr_fd_ctx_get (local->fd, this);
-
- for (i = 0; i < priv->child_count; i++) {
- if ((inodelk->locked_nodes[i] & LOCKED_YES) != LOCKED_YES)
- continue;
-
- if (local->fd) {
- flock_use = &flock;
- if (!local->transaction.eager_lock[i]) {
- goto wind;
- }
-
- piggyback = 0;
-
- LOCK (&local->fd->lock);
- {
- if (fd_ctx->lock_piggyback[i]) {
- fd_ctx->lock_piggyback[i]--;
- piggyback = 1;
- } else {
- fd_ctx->lock_acquired[i]--;
- }
- }
- UNLOCK (&local->fd->lock);
-
- if (piggyback) {
- afr_unlock_inodelk_cbk (frame, (void *) (long) i,
- this, 1, 0, NULL);
- if (!--call_count)
- break;
- continue;
- }
-
- flock_use = &full_flock;
- wind:
- AFR_TRACE_INODELK_IN (frame, this,
- AFR_INODELK_TRANSACTION,
- AFR_UNLOCK_OP, flock_use, F_SETLK,
- i);
-
- STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
- (void *) (long)i,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- int_lock->domain, local->fd,
- F_SETLK, flock_use, NULL);
-
- if (!--call_count)
- break;
-
- } else {
- AFR_TRACE_INODELK_IN (frame, this,
- AFR_INODELK_TRANSACTION,
- AFR_UNLOCK_OP, &flock, F_SETLK, i);
-
- STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
- (void *) (long)i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- int_lock->domain, &local->loc,
- F_SETLK, &flock, NULL);
-
- if (!--call_count)
- break;
- }
- }
-out:
- return 0;
+ return ret;
}
-static int32_t
-afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_internal_lock_t *int_lock = NULL;
- int32_t child_index = 0;
- int lockee_no = 0;
-
- priv = this->private;
- lockee_no = (int)((long) cookie) / priv->child_count;
- child_index = (int) ((long) cookie) % priv->child_count;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
- AFR_UNLOCK_OP,
- int_lock->lockee[lockee_no].basename, op_ret,
- op_errno, (int) ((long)cookie));
-
- if (op_ret < 0) {
- gf_msg (this->name, GF_LOG_ERROR, op_errno,
- AFR_MSG_ENTRY_UNLOCK_FAIL,
- "%s: unlock failed on %s", local->loc.path,
- priv->children[child_index]->name);
- }
-
- int_lock->lockee[lockee_no].locked_nodes[child_index] &= LOCKED_NO;
- afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, NULL);
+void
+afr_internal_lock_wind(call_frame_t *frame,
+ int32_t (*cbk)(call_frame_t *, void *, xlator_t *,
+ int32_t, int32_t, dict_t *),
+ void *cookie, int child, int lockee_num,
+ gf_boolean_t blocking, gf_boolean_t unlock)
+{
+ afr_local_t *local = frame->local;
+ xlator_t *this = frame->this;
+ afr_private_t *priv = this->private;
+ afr_internal_lock_t *int_lock = &local->internal_lock;
+ entrylk_cmd cmd = ENTRYLK_LOCK_NB;
+ int32_t cmd1 = F_SETLK;
+ struct gf_flock flock = {
+ 0,
+ };
+
+ switch (local->transaction.type) {
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ if (unlock) {
+ cmd = ENTRYLK_UNLOCK;
+ } else if (blocking) { /*Doesn't make sense to have blocking
+ unlock*/
+ cmd = ENTRYLK_LOCK;
+ }
+
+ if (local->fd) {
+ STACK_WIND_COOKIE(frame, cbk, cookie, priv->children[child],
+ priv->children[child]->fops->fentrylk,
+ int_lock->domain,
+ int_lock->lockee[lockee_num].fd,
+ int_lock->lockee[lockee_num].basename, cmd,
+ ENTRYLK_WRLCK, NULL);
+ } else {
+ STACK_WIND_COOKIE(frame, cbk, cookie, priv->children[child],
+ priv->children[child]->fops->entrylk,
+ int_lock->domain,
+ &int_lock->lockee[lockee_num].loc,
+ int_lock->lockee[lockee_num].basename, cmd,
+ ENTRYLK_WRLCK, NULL);
+ }
+ break;
- return 0;
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ flock = int_lock->lockee[lockee_num].flock;
+ if (unlock) {
+ flock.l_type = F_UNLCK;
+ } else if (blocking) { /*Doesn't make sense to have blocking
+ unlock*/
+ cmd1 = F_SETLKW;
+ }
+
+ if (local->fd) {
+ STACK_WIND_COOKIE(
+ frame, cbk, cookie, priv->children[child],
+ priv->children[child]->fops->finodelk, int_lock->domain,
+ int_lock->lockee[lockee_num].fd, cmd1, &flock, NULL);
+ } else {
+ STACK_WIND_COOKIE(
+ frame, cbk, cookie, priv->children[child],
+ priv->children[child]->fops->inodelk, int_lock->domain,
+ &int_lock->lockee[lockee_num].loc, cmd1, &flock, NULL);
+ }
+ break;
+ }
}
static int
-afr_unlock_entrylk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int index = 0;
- int lockee_no = 0;
- int copies = 0;
- int i = -1;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
- copies = priv->child_count;
-
- call_count = afr_lockee_locked_nodes_count (int_lock);
-
- int_lock->lk_call_count = call_count;
-
- if (!call_count){
- gf_log (this->name, GF_LOG_TRACE,
- "No internal locks unlocked");
- int_lock->lock_cbk (frame, this);
- goto out;
- }
-
- for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
- lockee_no = i / copies;
- index = i % copies;
- if (int_lock->lockee[lockee_no].locked_nodes[index] & LOCKED_YES) {
- AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_UNLOCK_OP,
- int_lock->lockee[lockee_no].basename,
- i);
-
- STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk,
- (void *) (long) i,
- priv->children[index],
- priv->children[index]->fops->entrylk,
- int_lock->domain,
- &int_lock->lockee[lockee_no].loc,
- int_lock->lockee[lockee_no].basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL);
-
- if (!--call_count)
- break;
- }
+afr_unlock_now(call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = 0;
+ int lockee_num = 0;
+ int i = -1;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ call_count = afr_lockee_locked_nodes_count(int_lock);
+
+ int_lock->lk_call_count = call_count;
+
+ if (!call_count) {
+ gf_msg_trace(this->name, 0, "No internal locks unlocked");
+ int_lock->lock_cbk(frame, this);
+ goto out;
+ }
+
+ for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
+ lockee_num = i / priv->child_count;
+ child_index = i % priv->child_count;
+ if (int_lock->lockee[lockee_num].locked_nodes[child_index] &
+ LOCKED_YES) {
+ afr_internal_lock_wind(frame, afr_unlock_common_cbk,
+ (void *)(long)i, child_index, lockee_num,
+ _gf_false, _gf_true);
+ if (!--call_count)
+ break;
}
+ }
out:
- return 0;
-
+ return 0;
}
static int32_t
-afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int cky = (long) cookie;
- int child_index = 0;
- int lockee_no = 0;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- child_index = ((int)cky) % priv->child_count;
- lockee_no = ((int)cky) / priv->child_count;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
- }
-
- local->op_errno = op_errno;
- int_lock->lock_op_errno = op_errno;
- }
-
- int_lock->lk_attempted_count++;
- }
- UNLOCK (&frame->lock);
-
- if ((op_ret == -1) &&
- (op_errno == ENOSYS)) {
- afr_unlock (frame, this);
- } else {
- if (op_ret == 0) {
- if (local->transaction.type == AFR_ENTRY_TRANSACTION ||
- local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
- int_lock->lockee[lockee_no].locked_nodes[child_index] |= LOCKED_YES;
- int_lock->lockee[lockee_no].locked_count++;
- int_lock->entrylk_lock_count++;
- } else {
- int_lock->locked_nodes[child_index] |= LOCKED_YES;
- int_lock->lock_count++;
- }
+afr_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int cky = (long)cookie;
+ int child_index = 0;
+ int lockee_num = 0;
+
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ child_index = ((int)cky) % priv->child_count;
+ lockee_num = ((int)cky) / priv->child_count;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_msg(this->name, GF_LOG_ERROR, ENOSYS,
+ AFR_MSG_LOCK_XLATOR_NOT_LOADED,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+ }
+
+ local->op_errno = op_errno;
+ int_lock->lock_op_errno = op_errno;
+ }
+
+ int_lock->lk_attempted_count++;
+ }
+ UNLOCK(&frame->lock);
+
+ if ((op_ret == -1) && (op_errno == ENOSYS)) {
+ afr_unlock_now(frame, this);
+ } else {
+ if (op_ret == 0) {
+ int_lock->lockee[lockee_num]
+ .locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lockee[lockee_num].locked_count++;
+ int_lock->lock_count++;
+ if (local->transaction.type == AFR_DATA_TRANSACTION) {
+ LOCK(&local->inode->lock);
+ {
+ local->inode_ctx->lock_count++;
}
- afr_lock_blocking (frame, this, cky + 1);
- }
-
- return 0;
-}
-
-static int32_t
-afr_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
- AFR_LOCK_OP, NULL, op_ret,
- op_errno, (long) cookie);
-
- afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
- return 0;
-
-}
-
-static int32_t
-afr_blocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, NULL, op_ret,
- op_errno, (long)cookie);
-
- afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
- return 0;
-}
-
-static int
-afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_inodelk_t *inodelk = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- memcpy (inodelk->locked_nodes, int_lock->locked_nodes,
- sizeof (*inodelk->locked_nodes) * priv->child_count);
- inodelk->lock_count = int_lock->lock_count;
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
- case AFR_ENTRY_TRANSACTION:
- /*entrylk_count is being used in both non-blocking and blocking
- * modes */
- break;
+ UNLOCK(&local->inode->lock);
+ }
}
+ afr_lock_blocking(frame, this, cky + 1);
+ }
- return 0;
-
+ return 0;
}
-static inline gf_boolean_t
-afr_is_entrylk (afr_internal_lock_t *int_lock,
- afr_transaction_type trans_type)
+static gf_boolean_t
+_is_lock_wind_needed(afr_local_t *local, int child_index)
{
- gf_boolean_t is_entrylk = _gf_false;
-
- if ((int_lock->transaction_lk_type == AFR_SELFHEAL_LK) &&
- int_lock->selfheal_lk_type == AFR_ENTRY_SELF_HEAL_LK) {
+ if (!local->child_up[child_index])
+ return _gf_false;
- is_entrylk = _gf_true;
-
- } else if ((int_lock->transaction_lk_type == AFR_TRANSACTION_LK) &&
- (trans_type == AFR_ENTRY_TRANSACTION ||
- trans_type == AFR_ENTRY_RENAME_TRANSACTION)) {
-
- is_entrylk = _gf_true;
-
- } else {
- is_entrylk = _gf_false;
- }
-
- return is_entrylk;
+ return _gf_true;
}
static gf_boolean_t
-_is_lock_wind_needed (afr_local_t *local, int child_index)
-{
- if (!local->child_up[child_index])
- return _gf_false;
-
- return _gf_true;
+is_blocking_locks_count_sufficient(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int child = 0;
+ int nlockee = 0;
+ int lockee_count = 0;
+ gf_boolean_t ret = _gf_true;
+
+ local = frame->local;
+ priv = this->private;
+ int_lock = &local->internal_lock;
+ lockee_count = int_lock->lockee_count;
+
+ if (int_lock->lock_count == 0) {
+ afr_log_locks_failure(frame, "any subvolume", "lock",
+ int_lock->lock_op_errno);
+ return _gf_false;
+ }
+ /* For FOPS that take multiple sets of locks (mkdir, rename),
+ * there must be at least one brick on which the locks from
+ * all lock sets were successful. */
+ for (child = 0; child < priv->child_count; child++) {
+ ret = _gf_true;
+ for (nlockee = 0; nlockee < lockee_count; nlockee++) {
+ if (!(int_lock->lockee[nlockee].locked_nodes[child] & LOCKED_YES))
+ ret = _gf_false;
+ }
+ if (ret)
+ return ret;
+ }
+ if (!ret)
+ afr_log_locks_failure(frame, "all", "lock", int_lock->lock_op_errno);
+
+ return ret;
}
int
-afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
+afr_lock_blocking(call_frame_t *frame, xlator_t *this, int cookie)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_inodelk_t *inodelk = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- struct gf_flock flock = {0,};
- uint64_t ctx = 0;
- int ret = 0;
- int child_index = 0;
- int lockee_no = 0;
- gf_boolean_t is_entrylk = _gf_false;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
- child_index = cookie % priv->child_count;
- lockee_no = cookie / priv->child_count;
- is_entrylk = afr_is_entrylk (int_lock, local->transaction.type);
-
-
- if (!is_entrylk) {
- inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- flock.l_start = inodelk->flock.l_start;
- flock.l_len = inodelk->flock.l_len;
- flock.l_type = inodelk->flock.l_type;
- }
-
- if (local->fd) {
- ret = fd_ctx_get (local->fd, this, &ctx);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "unable to get fd ctx for fd=%p",
- local->fd);
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ uint64_t ctx = 0;
+ int ret = 0;
+ int child_index = 0;
+ int lockee_num = 0;
- local->op_ret = -1;
- int_lock->lock_op_ret = -1;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+ child_index = cookie % priv->child_count;
+ lockee_num = cookie / priv->child_count;
- afr_copy_locked_nodes (frame, this);
+ if (local->fd) {
+ ret = fd_ctx_get(local->fd, this, &ctx);
- afr_unlock (frame, this);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_FD_CTX_GET_FAILED,
+ "unable to get fd ctx for fd=%p", local->fd);
- return 0;
- }
- }
-
- if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
- if ((is_entrylk && int_lock->entrylk_lock_count == 0) ||
- (!is_entrylk && int_lock->lock_count == 0)) {
- gf_log (this->name, GF_LOG_INFO,
- "unable to lock on even one child");
-
- local->op_ret = -1;
- int_lock->lock_op_ret = -1;
-
- afr_copy_locked_nodes (frame, this);
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
- afr_unlock(frame, this);
+ afr_unlock_now(frame, this);
- return 0;
- }
+ return 0;
}
+ }
- if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
- /* we're done locking */
-
- gf_log (this->name, GF_LOG_DEBUG,
- "we're done locking");
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
+ if (!is_blocking_locks_count_sufficient(frame, this)) {
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
- afr_copy_locked_nodes (frame, this);
+ afr_unlock_now(frame, this);
- int_lock->lock_op_ret = 0;
- int_lock->lock_cbk (frame, this);
- return 0;
+ return 0;
}
+ }
- if (!_is_lock_wind_needed (local, child_index)) {
- afr_lock_blocking (frame, this, cookie + 1);
- return 0;
- }
-
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
+ /* we're done locking */
- if (local->fd) {
- AFR_TRACE_INODELK_IN (frame, this,
- AFR_INODELK_TRANSACTION,
- AFR_LOCK_OP, &flock, F_SETLKW,
- child_index);
-
- STACK_WIND_COOKIE (frame, afr_blocking_inodelk_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->finodelk,
- int_lock->domain, local->fd,
- F_SETLKW, &flock, NULL);
-
- } else {
- AFR_TRACE_INODELK_IN (frame, this,
- AFR_INODELK_TRANSACTION,
- AFR_LOCK_OP, &flock, F_SETLKW,
- child_index);
-
- STACK_WIND_COOKIE (frame, afr_blocking_inodelk_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->inodelk,
- int_lock->domain, &local->loc,
- F_SETLKW, &flock, NULL);
- }
+ gf_msg_debug(this->name, 0, "we're done locking");
- break;
+ int_lock->lock_op_ret = 0;
+ int_lock->lock_cbk(frame, this);
+ return 0;
+ }
- case AFR_ENTRY_RENAME_TRANSACTION:
- case AFR_ENTRY_TRANSACTION:
- /*Accounting for child_index increments on 'down'
- *and 'fd-less' children */
-
- if (local->fd) {
- AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP,
- int_lock->lockee[lockee_no].basename,
- cookie);
-
- STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
- (void *) (long) cookie,
- priv->children[child_index],
- priv->children[child_index]->fops->fentrylk,
- int_lock->domain, local->fd,
- int_lock->lockee[lockee_no].basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
- } else {
- AFR_TRACE_ENTRYLK_IN (frame, this,
- AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, local->transaction.basename,
- child_index);
-
- STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
- (void *) (long) cookie,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- int_lock->domain,
- &int_lock->lockee[lockee_no].loc,
- int_lock->lockee[lockee_no].basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
- }
+ if (!_is_lock_wind_needed(local, child_index)) {
+ afr_lock_blocking(frame, this, cookie + 1);
+ return 0;
+ }
- break;
- }
+ afr_internal_lock_wind(frame, afr_lock_cbk, (void *)(long)cookie,
+ child_index, lockee_num, _gf_true, _gf_false);
- return 0;
+ return 0;
}
int32_t
-afr_blocking_lock (call_frame_t *frame, xlator_t *this)
+afr_blocking_lock(call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int up_count = 0;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int up_count = 0;
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- initialize_inodelk_variables (frame, this);
- break;
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- case AFR_ENTRY_RENAME_TRANSACTION:
- case AFR_ENTRY_TRANSACTION:
- up_count = AFR_COUNT (local->child_up, priv->child_count);
- int_lock->lk_call_count = int_lock->lk_expected_count
- = (int_lock->lockee_count *
- up_count);
- initialize_entrylk_variables (frame, this);
- break;
- }
+ up_count = AFR_COUNT(local->child_up, priv->child_count);
+ int_lock->lk_call_count = int_lock->lk_expected_count =
+ (int_lock->lockee_count * up_count);
+ initialize_internal_lock_variables(frame, this);
- afr_lock_blocking (frame, this, 0);
+ afr_lock_blocking(frame, this, 0);
- return 0;
+ return 0;
}
static int32_t
-afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
- int copies = 0;
- int index = 0;
- int lockee_no = 0;
- afr_private_t *priv = NULL;
-
- priv = this->private;
-
- copies = priv->child_count;
- index = child_index % copies;
- lockee_no = child_index / copies;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP,
- int_lock->lockee[lockee_no].basename, op_ret,
- op_errno, (long) cookie);
-
- LOCK (&frame->lock);
- {
- if (op_ret < 0 ) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
-
- int_lock->lock_op_errno = op_errno;
- local->op_errno = op_errno;
- }
- } else if (op_ret == 0) {
- int_lock->lockee[lockee_no].locked_nodes[index] |= \
- LOCKED_YES;
- int_lock->lockee[lockee_no].locked_count++;
- int_lock->entrylk_lock_count++;
- }
-
- call_count = --int_lock->lk_call_count;
- }
- UNLOCK (&frame->lock);
-
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "Last locking reply received");
- /* all locks successful. Proceed to call FOP */
- if (int_lock->entrylk_lock_count ==
- int_lock->lk_expected_count) {
- gf_log (this->name, GF_LOG_TRACE,
- "All servers locked. Calling the cbk");
- int_lock->lock_op_ret = 0;
- int_lock->lock_cbk (frame, this);
- }
- /* Not all locks were successful. Unlock and try locking
- again, this time with serially blocking locks */
- else {
- gf_log (this->name, GF_LOG_TRACE,
- "%d servers locked. Trying again with blocking calls",
- int_lock->lock_count);
-
- afr_unlock(frame, this);
- }
- }
-
- return 0;
-}
-
-int
-afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
+afr_nb_internal_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- int copies = 0;
- int index = 0;
- int lockee_no = 0;
- int32_t call_count = 0;
- int i = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- copies = priv->child_count;
- initialize_entrylk_variables (frame, this);
-
- if (local->fd) {
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- if (!fd_ctx) {
- gf_log (this->name, GF_LOG_INFO,
- "unable to get fd ctx for fd=%p",
- local->fd);
-
- local->op_ret = -1;
- int_lock->lock_op_ret = -1;
- local->op_errno = EINVAL;
- int_lock->lock_op_errno = EINVAL;
-
- afr_unlock (frame, this);
- return -1;
- }
-
- call_count = int_lock->lockee_count * internal_lock_count (frame, this);
- int_lock->lk_call_count = call_count;
- int_lock->lk_expected_count = call_count;
-
- if (!call_count) {
- gf_log (this->name, GF_LOG_INFO,
- "fd not open on any subvolumes. aborting.");
- afr_unlock (frame, this);
- goto out;
- }
-
- /* Send non-blocking entrylk calls only on up children
- and where the fd has been opened */
- for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) {
- index = i%copies;
- lockee_no = i/copies;
- if (local->child_up[index]) {
- AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_LOCK_OP,
- int_lock->lockee[lockee_no].basename,
- i);
-
- STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
- (void *) (long) i,
- priv->children[index],
- priv->children[index]->fops->fentrylk,
- this->name, local->fd,
- int_lock->lockee[lockee_no].basename,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
- NULL);
- if (!--call_count)
- break;
- }
- }
- } else {
- call_count = int_lock->lockee_count * internal_lock_count (frame, this);
- int_lock->lk_call_count = call_count;
- int_lock->lk_expected_count = call_count;
-
- for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) {
- index = i%copies;
- lockee_no = i/copies;
- if (local->child_up[index]) {
- AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_LOCK_OP,
- int_lock->lockee[lockee_no].basename,
- i);
-
- STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
- (void *) (long) i,
- priv->children[index],
- priv->children[index]->fops->entrylk,
- this->name, &int_lock->lockee[lockee_no].loc,
- int_lock->lockee[lockee_no].basename,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
- }
-out:
- return 0;
-}
-
-int32_t
-afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_inodelk_t *inodelk = NULL;
- afr_local_t *local = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
- afr_fd_ctx_t *fd_ctx = NULL;
-
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ int child_index = 0;
+ int lockee_num = 0;
+ afr_private_t *priv = NULL;
- local = frame->local;
- int_lock = &local->internal_lock;
- inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ priv = this->private;
- AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_NB_TRANSACTION,
- AFR_LOCK_OP, NULL, op_ret,
- op_errno, (long) cookie);
+ child_index = ((long)cookie) % priv->child_count;
+ lockee_num = ((long)cookie) / priv->child_count;
- if (local->fd)
- fd_ctx = afr_fd_ctx_get (local->fd, this);
+ local = frame->local;
+ int_lock = &local->internal_lock;
- LOCK (&frame->lock);
+ if (op_ret == 0 && local->transaction.type == AFR_DATA_TRANSACTION) {
+ LOCK(&local->inode->lock);
{
- if (op_ret < 0) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on "
- "server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
- int_lock->lock_op_errno = op_errno;
- local->op_errno = op_errno;
- }
- if (local->transaction.eager_lock)
- local->transaction.eager_lock[child_index] = 0;
- } else {
- inodelk->locked_nodes[child_index] |= LOCKED_YES;
- inodelk->lock_count++;
-
- if (local->transaction.eager_lock &&
- local->transaction.eager_lock[child_index] &&
- local->fd) {
- /* piggybacked */
- if (op_ret == 1) {
- /* piggybacked */
- } else if (op_ret == 0) {
- /* lock acquired from server */
- fd_ctx->lock_acquired[child_index]++;
- }
- }
- }
-
- call_count = --int_lock->lk_call_count;
- }
- UNLOCK (&frame->lock);
-
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "Last inode locking reply received");
- /* all locks successful. Proceed to call FOP */
- if (inodelk->lock_count == int_lock->lk_expected_count) {
- gf_log (this->name, GF_LOG_TRACE,
- "All servers locked. Calling the cbk");
- int_lock->lock_op_ret = 0;
- int_lock->lock_cbk (frame, this);
- }
- /* Not all locks were successful. Unlock and try locking
- again, this time with serially blocking locks */
- else {
- gf_log (this->name, GF_LOG_TRACE,
- "%d servers locked. Trying again with blocking calls",
- int_lock->lock_count);
-
- afr_unlock(frame, this);
- }
+ local->inode_ctx->lock_count++;
}
+ UNLOCK(&local->inode->lock);
+ }
- return 0;
+ LOCK(&frame->lock);
+ {
+ if (op_ret < 0) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_msg(this->name, GF_LOG_ERROR, ENOSYS,
+ AFR_MSG_LOCK_XLATOR_NOT_LOADED,
+ "subvolume does not support "
+ "locking. please load features/locks"
+ " xlator on server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+
+ int_lock->lock_op_errno = op_errno;
+ local->op_errno = op_errno;
+ }
+ } else if (op_ret == 0) {
+ int_lock->lockee[lockee_num]
+ .locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lockee[lockee_num].locked_count++;
+ int_lock->lock_count++;
+ }
+
+ call_count = --int_lock->lk_call_count;
+ }
+ UNLOCK(&frame->lock);
+
+ if (call_count == 0) {
+ gf_msg_trace(this->name, 0, "Last locking reply received");
+ /* all locks successful. Proceed to call FOP */
+ if (int_lock->lock_count == int_lock->lk_expected_count) {
+ gf_msg_trace(this->name, 0, "All servers locked. Calling the cbk");
+ int_lock->lock_op_ret = 0;
+ int_lock->lock_cbk(frame, this);
+ }
+ /* Not all locks were successful. Unlock and try locking
+ again, this time with serially blocking locks */
+ else {
+ gf_msg_trace(this->name, 0,
+ "%d servers locked. Trying again "
+ "with blocking calls",
+ int_lock->lock_count);
+
+ afr_unlock_now(frame, this);
+ }
+ }
+
+ return 0;
}
int
-afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_inodelk_t *inodelk = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- int32_t call_count = 0;
- int i = 0;
- int ret = 0;
- struct gf_flock flock = {0,};
- struct gf_flock full_flock = {0,};
- struct gf_flock *flock_use = NULL;
- int piggyback = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- inodelk = afr_get_inodelk (int_lock, int_lock->domain);
-
- flock.l_start = inodelk->flock.l_start;
- flock.l_len = inodelk->flock.l_len;
- flock.l_type = inodelk->flock.l_type;
-
- full_flock.l_type = inodelk->flock.l_type;
-
- initialize_inodelk_variables (frame, this);
-
- if (local->fd) {
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- if (!fd_ctx) {
- gf_log (this->name, GF_LOG_INFO,
- "unable to get fd ctx for fd=%p",
- local->fd);
-
- local->op_ret = -1;
- int_lock->lock_op_ret = -1;
- local->op_errno = EINVAL;
- int_lock->lock_op_errno = EINVAL;
-
- afr_unlock (frame, this);
- ret = -1;
- goto out;
- }
-
- call_count = internal_lock_count (frame, this);
- int_lock->lk_call_count = call_count;
- int_lock->lk_expected_count = call_count;
-
- if (!call_count) {
- gf_log (this->name, GF_LOG_INFO,
- "fd not open on any subvolumes. aborting.");
- afr_unlock (frame, this);
- goto out;
- }
-
- /* Send non-blocking inodelk calls only on up children
- and where the fd has been opened */
- for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i])
- continue;
-
- flock_use = &flock;
- if (!local->transaction.eager_lock_on) {
- goto wind;
- }
-
- piggyback = 0;
- local->transaction.eager_lock[i] = 1;
-
- afr_set_delayed_post_op (frame, this);
-
- LOCK (&local->fd->lock);
- {
- if (fd_ctx->lock_acquired[i]) {
- fd_ctx->lock_piggyback[i]++;
- piggyback = 1;
- }
- }
- UNLOCK (&local->fd->lock);
-
- if (piggyback) {
- /* (op_ret == 1) => indicate piggybacked lock */
- afr_nonblocking_inodelk_cbk (frame, (void *) (long) i,
- this, 1, 0, NULL);
- if (!--call_count)
- break;
- continue;
- }
- flock_use = &full_flock;
- wind:
- AFR_TRACE_INODELK_IN (frame, this,
- AFR_INODELK_NB_TRANSACTION,
- AFR_LOCK_OP, flock_use, F_SETLK, i);
-
- STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- int_lock->domain, local->fd,
- F_SETLK, flock_use, NULL);
-
- if (!--call_count)
- break;
- }
- } else {
- call_count = internal_lock_count (frame, this);
- int_lock->lk_call_count = call_count;
- int_lock->lk_expected_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i])
- continue;
- AFR_TRACE_INODELK_IN (frame, this,
- AFR_INODELK_NB_TRANSACTION,
- AFR_LOCK_OP, &flock, F_SETLK, i);
-
- STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- int_lock->domain, &local->loc,
- F_SETLK, &flock, NULL);
-
- if (!--call_count)
- break;
- }
+afr_lock_nonblocking(call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int child = 0;
+ int lockee_num = 0;
+ int32_t call_count = 0;
+ int i = 0;
+ int ret = 0;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ initialize_internal_lock_variables(frame, this);
+
+ if (local->fd) {
+ fd_ctx = afr_fd_ctx_get(local->fd, this);
+ if (!fd_ctx) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_FD_CTX_GET_FAILED,
+ "unable to get fd ctx for fd=%p", local->fd);
+
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
+ local->op_errno = EINVAL;
+ int_lock->lock_op_errno = EINVAL;
+
+ afr_unlock_now(frame, this);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ call_count = int_lock->lockee_count * internal_lock_count(frame, this);
+ int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
+
+ if (!call_count) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_INFO_COMMON,
+ "fd not open on any subvolumes. aborting.");
+ afr_unlock_now(frame, this);
+ goto out;
+ }
+
+ /* Send non-blocking lock calls only on up children
+ and where the fd has been opened */
+ for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
+ child = i % priv->child_count;
+ lockee_num = i / priv->child_count;
+ if (local->child_up[child]) {
+ afr_internal_lock_wind(frame, afr_nb_internal_lock_cbk,
+ (void *)(long)i, child, lockee_num,
+ _gf_false, _gf_false);
+ if (!--call_count)
+ break;
}
+ }
out:
- return ret;
+ return ret;
}
int32_t
-afr_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- if (transaction_lk_op (local)) {
- if (is_afr_lock_transaction (local))
- afr_unlock_inodelk (frame, this);
- else
- afr_unlock_entrylk (frame, this);
-
- } else {
- if (is_afr_lock_selfheal (local))
- afr_unlock_inodelk (frame, this);
- else
- afr_unlock_entrylk (frame, this);
- }
-
+afr_unlock(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_lock_t *lock = NULL;
+
+ local = frame->local;
+
+ if (!local->transaction.eager_lock_on)
+ goto out;
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ LOCK(&local->inode->lock);
+ {
+ list_del_init(&local->transaction.owner_list);
+ if (list_empty(&lock->owners) && list_empty(&lock->post_op)) {
+ local->transaction.do_eager_unlock = _gf_true;
+ /*TODO: Need to get metadata use on_disk and inherit/uninherit
+ *GF_ASSERT (!local->inode_ctx->on_disk[local->transaction.type]);
+ *GF_ASSERT (!local->inode_ctx->inherited[local->transaction.type]);
+ */
+ GF_ASSERT(lock->release);
+ }
+ }
+ UNLOCK(&local->inode->lock);
+ if (!local->transaction.do_eager_unlock) {
+ local->internal_lock.lock_cbk(frame, this);
return 0;
-}
-
-int
-afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom,
- unsigned int child_count)
-{
- afr_local_t *dst_local = NULL;
- afr_local_t *src_local = NULL;
- afr_internal_lock_t *dst_lock = NULL;
- afr_internal_lock_t *src_lock = NULL;
- afr_inodelk_t *dst_inodelk = NULL;
- afr_inodelk_t *src_inodelk = NULL;
- int ret = -1;
-
- src_local = src->local;
- src_lock = &src_local->internal_lock;
- src_inodelk = afr_get_inodelk (src_lock, dom);
- dst_local = dst->local;
- dst_lock = &dst_local->internal_lock;
- dst_inodelk = afr_get_inodelk (dst_lock, dom);
- if (!dst_inodelk || !src_inodelk)
- goto out;
- if (src_inodelk->locked_nodes) {
- memcpy (dst_inodelk->locked_nodes, src_inodelk->locked_nodes,
- sizeof (*dst_inodelk->locked_nodes) * child_count);
- memset (src_inodelk->locked_nodes, 0,
- sizeof (*src_inodelk->locked_nodes) * child_count);
- }
+ }
- dst_lock->transaction_lk_type = src_lock->transaction_lk_type;
- dst_lock->selfheal_lk_type = src_lock->selfheal_lk_type;
- dst_inodelk->lock_count = src_inodelk->lock_count;
- src_inodelk->lock_count = 0;
- ret = 0;
out:
- return ret;
+ afr_unlock_now(frame, this);
+ return 0;
}
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index 05df90cc0ee..816065fb57a 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -8,42 +8,31 @@
cases as published by the Free Software Foundation.
*/
-
#ifndef __AFR_MEM_TYPES_H__
#define __AFR_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_afr_mem_types_ {
- gf_afr_mt_iovec = gf_common_mt_end + 1,
- gf_afr_mt_afr_fd_ctx_t,
- gf_afr_mt_afr_private_t,
- gf_afr_mt_int32_t,
- gf_afr_mt_char,
- gf_afr_mt_xattr_key,
- gf_afr_mt_dict_t,
- gf_afr_mt_xlator_t,
- gf_afr_mt_iatt,
- gf_afr_mt_int,
- gf_afr_mt_afr_node_character,
- gf_afr_mt_sh_diff_loop_state,
- gf_afr_mt_uint8_t,
- gf_afr_mt_loc_t,
- gf_afr_mt_entry_name,
- gf_afr_mt_pump_priv,
- gf_afr_mt_locked_fd,
- gf_afr_mt_inode_ctx_t,
- gf_afr_fd_paused_call_t,
- gf_afr_mt_crawl_data_t,
- gf_afr_mt_brick_pos_t,
- gf_afr_mt_shd_bool_t,
- gf_afr_mt_shd_timer_t,
- gf_afr_mt_shd_event_t,
- gf_afr_mt_time_t,
- gf_afr_mt_pos_data_t,
- gf_afr_mt_reply_t,
- gf_afr_mt_subvol_healer_t,
- gf_afr_mt_end
+ gf_afr_mt_afr_fd_ctx_t = gf_common_mt_end + 1,
+ gf_afr_mt_afr_private_t,
+ gf_afr_mt_int32_t,
+ gf_afr_mt_char,
+ gf_afr_mt_xattr_key,
+ gf_afr_mt_dict_t,
+ gf_afr_mt_xlator_t,
+ gf_afr_mt_afr_node_character,
+ gf_afr_mt_inode_ctx_t,
+ gf_afr_mt_shd_event_t,
+ gf_afr_mt_reply_t,
+ gf_afr_mt_subvol_healer_t,
+ gf_afr_mt_spbc_timeout_t,
+ gf_afr_mt_spb_status_t,
+ gf_afr_mt_empty_brick_t,
+ gf_afr_mt_child_latency_t,
+ gf_afr_mt_atomic_t,
+ gf_afr_mt_lk_heal_info_t,
+ gf_afr_mt_gf_lock,
+ gf_afr_mt_end
};
#endif
-
diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h
index 52bdead934b..e73fd997765 100644
--- a/xlators/cluster/afr/src/afr-messages.h
+++ b/xlators/cluster/afr/src/afr-messages.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -11,137 +11,157 @@
#ifndef _AFR_MESSAGES_H_
#define _AFR_MESSAGES_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include <glusterfs/glfs-message-id.h>
-#include "glfs-message-id.h"
-
-/*! \file afr-messages.h
- * \brief AFR log-message IDs and their descriptions.
- */
-
-/* NOTE: Rules for message additions
- * 1) Each instance of a message is _better_ left with a unique message ID, even
- * if the message format is the same. Reasoning is that, if the message
- * format needs to change in one instance, the other instances are not
- * impacted or the new change does not change the ID of the instance being
- * modified.
- * 2) Addition of a message,
- * - Should increment the GLFS_NUM_MESSAGES
- * - Append to the list of messages defined, towards the end
- * - Retain macro naming as glfs_msg_X (for redability across developers)
- * NOTE: Rules for message format modifications
- * 3) Check acorss the code if the message ID macro in question is reused
- * anywhere. If reused then then the modifications should ensure correctness
- * everywhere, or needs a new message ID as (1) above was not adhered to. If
- * not used anywhere, proceed with the required modification.
- * NOTE: Rules for message deletion
- * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
- * anywhere, then can be deleted, but will leave a hole by design, as
- * addition rules specify modification to the end of the list and not filling
- * holes.
- */
-
-#define GLFS_COMP_BASE_AFR GLFS_MSGID_COMP_AFR
-#define GLFS_NUM_MESSAGES 10
-#define GLFS_MSGID_END (GLFS_COMP_BASE_AFR + GLFS_NUM_MESSAGES + 1)
-
-#define glfs_msg_start_x GLFS_COMP_BASE_AFR, "Invalid: Start of messages"
-
-/*!
- * @messageid 108001
- * @diagnosis Client quorum is not met due to which file modification
- * operations are disallowed.
- * @recommendedaction Some brick processes are down/ not visible from the
- * client. Ensure that the bricks are up/ network traffic is not blocked.
- */
-#define AFR_MSG_QUORUM_FAIL (GLFS_COMP_BASE_AFR + 1)
-
-
-/*!
- * @messageid 108002
- * @diagnosis The bricks that were down are now up and quorum is restored.
- * @recommendedaction Possibly check why the bricks went down to begin with.
- */
-#define AFR_MSG_QUORUM_MET (GLFS_COMP_BASE_AFR + 2)
-
-
-/*!
- * @messageid 108003
- * @diagnosis Client quorum-type was set to auto due to which the quorum-count
- * option is no longer valid.
- * @recommendedaction None.
- */
-#define AFR_MSG_QUORUM_OVERRIDE (GLFS_COMP_BASE_AFR + 3)
-
-
-/*!
- * @messageid 108004
- * @diagnosis Replication sub volume witnessed a connection notification
- * from a brick which does not belong to its replica set.
- * @recommendedaction None. This is a safety check in code.
- */
-#define AFR_MSG_INVALID_CHILD_UP (GLFS_COMP_BASE_AFR + 4)
-
-
-/*!
- * @messageid 108005
- * @diagnosis A replica set that was inaccessible because all its bricks were
- * down is now accessible because at least one of its bricks came back up.
- * @recommendedaction Possibly check why all the bricks of that replica set
- * went down to begin with.
- */
-#define AFR_MSG_SUBVOL_UP (GLFS_COMP_BASE_AFR + 5)
-
-
-/*!
- * @messageid 108006
- * @diagnosis All bricks of a replica set are down. Data residing in that
- * replica cannot be accessed until one of the bricks come back up.
- * @recommendedaction Ensure that the bricks are up.
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
*/
-#define AFR_MSG_ALL_SUBVOLS_DOWN (GLFS_COMP_BASE_AFR + 6)
-
-
-/*!
- * @messageid 108007
- * @diagnosis Entry unlocks failed on a brick.
- * @recommendedaction Error number in the log should give the reason why it
- * failed. Also observe brick logs for more information.
-*/
-#define AFR_MSG_ENTRY_UNLOCK_FAIL (GLFS_COMP_BASE_AFR + 7)
-
-
-/*!
- * @messageid 108008
- * @diagnosis There is an inconsistency in the file's data/metadata/gfid
- * amongst the bricks of a replica set.
- * @recommendedaction Resolve the split brain by clearing the AFR changelog
- * attributes from the appropriate brick and trigger self-heal.
- */
-#define AFR_MSG_SPLIT_BRAIN (GLFS_COMP_BASE_AFR + 8)
-
-
-/*!
- * @messageid 108009
- * @diagnosis open/opendir failed on a brick.
- * @recommendedaction Error number in the log should give the reason why it
- * failed. Also observe brick logs for more information.
- */
-#define AFR_MSG_OPEN_FAIL (GLFS_COMP_BASE_AFR + 9)
-
-
-/*!
- * @messageid 108010
- * @diagnosis Inode unlocks failed on a brick.
- * @recommendedaction Error number in the log should give the reason why it
- * failed. Also observe brick logs for more information.
-*/
-#define AFR_MSG_INODE_UNLOCK_FAIL (GLFS_COMP_BASE_AFR + 10)
-
-
-#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+GLFS_MSGID(
+ AFR, AFR_MSG_QUORUM_FAIL, AFR_MSG_QUORUM_MET, AFR_MSG_QUORUM_OVERRIDE,
+ AFR_MSG_INVALID_CHILD_UP, AFR_MSG_SUBVOL_UP, AFR_MSG_SUBVOLS_DOWN,
+ AFR_MSG_ENTRY_UNLOCK_FAIL, AFR_MSG_SPLIT_BRAIN, AFR_MSG_OPEN_FAIL,
+ AFR_MSG_UNLOCK_FAIL, AFR_MSG_REPLACE_BRICK_STATUS, AFR_MSG_GFID_NULL,
+ AFR_MSG_FD_CREATE_FAILED, AFR_MSG_DICT_SET_FAILED,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR, AFR_MSG_MIGRATION_IN_PROGRESS,
+ AFR_MSG_CHILD_MISCONFIGURED, AFR_MSG_VOL_MISCONFIGURED,
+ AFR_MSG_INTERNAL_LKS_FAILED, AFR_MSG_INVALID_FD, AFR_MSG_LOCK_INFO,
+ AFR_MSG_LOCK_XLATOR_NOT_LOADED, AFR_MSG_FD_CTX_GET_FAILED,
+ AFR_MSG_INVALID_SUBVOL, AFR_MSG_PUMP_XLATOR_ERROR, AFR_MSG_SELF_HEAL_INFO,
+ AFR_MSG_READ_SUBVOL_ERROR, AFR_MSG_DICT_GET_FAILED, AFR_MSG_INFO_COMMON,
+ AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, AFR_MSG_LOCAL_CHILD, AFR_MSG_INVALID_DATA,
+ AFR_MSG_INVALID_ARG, AFR_MSG_INDEX_DIR_GET_FAILED, AFR_MSG_FSYNC_FAILED,
+ AFR_MSG_FAVORITE_CHILD, AFR_MSG_SELF_HEAL_FAILED,
+ AFR_MSG_SPLIT_BRAIN_STATUS, AFR_MSG_ADD_BRICK_STATUS, AFR_MSG_NO_CHANGELOG,
+ AFR_MSG_TIMER_CREATE_FAIL, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ AFR_MSG_INODE_CTX_GET_FAILED, AFR_MSG_THIN_ARB,
+ AFR_MSG_THIN_ARB_XATTROP_FAILED, AFR_MSG_THIN_ARB_LOC_POP_FAILED,
+ AFR_MSG_GET_PEND_VAL, AFR_MSG_THIN_ARB_SKIP_SHD, AFR_MSG_UNKNOWN_SET,
+ AFR_MSG_NO_XL_ID, AFR_MSG_SELF_HEAL_INFO_START,
+ AFR_MSG_SELF_HEAL_INFO_FINISH, AFR_MSG_INCRE_COUNT,
+ AFR_MSG_ADD_TO_OUTPUT_FAILED, AFR_MSG_SET_TIME_FAILED,
+ AFR_MSG_GFID_MISMATCH_DETECTED, AFR_MSG_GFID_HEAL_MSG,
+ AFR_MSG_THIN_ARB_LOOKUP_FAILED, AFR_MSG_DICT_CREATE_FAILED,
+ AFR_MSG_NO_MAJORITY_TO_RESOLVE, AFR_MSG_TYPE_MISMATCH,
+ AFR_MSG_SIZE_POLICY_NOT_APPLICABLE, AFR_MSG_NO_CHILD_SELECTED,
+ AFR_MSG_INVALID_CHILD, AFR_MSG_RESOLVE_CONFLICTING_DATA,
+ SERROR_GETTING_SRC_BRICK, SNO_DIFF_IN_MTIME, SNO_BIGGER_FILE,
+ SALL_BRICKS_UP_TO_RESOLVE, AFR_MSG_UNLOCK_FAILED, AFR_MSG_POST_OP_FAILED,
+ AFR_MSG_TA_FRAME_CREATE_FAILED, AFR_MSG_SET_KEY_XATTROP_FAILED,
+ AFR_MSG_BLOCKING_ENTRYLKS_FAILED, AFR_MSG_FOP_FAILED,
+ AFR_MSG_CLEAN_UP_FAILED, AFR_MSG_UNABLE_TO_FETCH, AFR_MSG_XATTR_SET_FAILED,
+ AFR_MSG_SPLIT_BRAIN_REPLICA, AFR_MSG_INODE_CTX_FAILED,
+ AFR_MSG_LOOKUP_FAILED, AFR_MSG_ALL_SUBVOLS_DOWN,
+ AFR_MSG_RELEASE_LOCK_FAILED, AFR_MSG_CLEAR_TIME_SPLIT_BRAIN,
+ AFR_MSG_READ_FAILED, AFR_MSG_LAUNCH_FAILED, AFR_MSG_READ_SUBVOL_NOT_UP,
+ AFR_MSG_LK_HEAL_DOM, AFR_MSG_NEW_BRICK, AFR_MSG_SPLIT_BRAIN_SET_FAILED,
+ AFR_MSG_SPLIT_BRAIN_DETERMINE_FAILED, AFR_MSG_HEALER_SPAWN_FAILED,
+ AFR_MSG_ADD_CRAWL_EVENT_FAILED, AFR_MSG_NULL_DEREF, AFR_MSG_SET_PEND_XATTR,
+ AFR_MSG_INTERNAL_ATTR);
+
+#define AFR_MSG_DICT_GET_FAILED_STR "Dict get failed"
+#define AFR_MSG_DICT_SET_FAILED_STR "Dict set failed"
+#define AFR_MSG_HEALER_SPAWN_FAILED_STR "Healer spawn failed"
+#define AFR_MSG_ADD_CRAWL_EVENT_FAILED_STR "Adding crawl event failed"
+#define AFR_MSG_INVALID_ARG_STR "Invalid argument"
+#define AFR_MSG_INDEX_DIR_GET_FAILED_STR "unable to get index-dir on "
+#define AFR_MSG_THIN_ARB_LOOKUP_FAILED_STR "Failed lookup on file"
+#define AFR_MSG_DICT_CREATE_FAILED_STR "Failed to create dict."
+#define AFR_MSG_THIN_ARB_XATTROP_FAILED_STR "Xattrop failed."
+#define AFR_MSG_THIN_ARB_LOC_POP_FAILED_STR \
+ "Failed to populate loc for thin-arbiter"
+#define AFR_MSG_GET_PEND_VAL_STR "Error getting value of pending"
+#define AFR_MSG_THIN_ARB_SKIP_SHD_STR "I am not the god shd. skipping."
+#define AFR_MSG_UNKNOWN_SET_STR "Unknown set"
+#define AFR_MSG_NO_XL_ID_STR "xl does not have id"
+#define AFR_MSG_SELF_HEAL_INFO_START_STR "starting full sweep on"
+#define AFR_MSG_SELF_HEAL_INFO_FINISH_STR "finished full sweep on"
+#define AFR_MSG_INCRE_COUNT_STR "Could not increment the counter."
+#define AFR_MSG_ADD_TO_OUTPUT_FAILED_STR "Could not add to output"
+#define AFR_MSG_SET_TIME_FAILED_STR "Could not set time"
+#define AFR_MSG_GFID_HEAL_MSG_STR "Error setting gfid-heal-msg dict"
+#define AFR_MSG_NO_MAJORITY_TO_RESOLVE_STR \
+ "No majority to resolve gfid split brain"
+#define AFR_MSG_GFID_MISMATCH_DETECTED_STR "Gfid mismatch dectected"
+#define AFR_MSG_SELF_HEAL_INFO_STR "performing selfheal"
+#define AFR_MSG_TYPE_MISMATCH_STR "TYPE mismatch"
+#define AFR_MSG_SIZE_POLICY_NOT_APPLICABLE_STR \
+ "Size policy is not applicable to directories."
+#define AFR_MSG_NO_CHILD_SELECTED_STR \
+ "No child selected by favorite-child policy"
+#define AFR_MSG_INVALID_CHILD_STR "Invalid child"
+#define AFR_MSG_RESOLVE_CONFLICTING_DATA_STR \
+ "selected as authentic to resolve conflicting data"
+#define SERROR_GETTING_SRC_BRICK_STR "Error getting the source brick"
+#define SNO_DIFF_IN_MTIME_STR "No difference in mtime"
+#define SNO_BIGGER_FILE_STR "No bigger file"
+#define SALL_BRICKS_UP_TO_RESOLVE_STR \
+ "All the bricks should be up to resolve the gfid split brain"
+#define AFR_MSG_UNLOCK_FAILED_STR "Failed to unlock"
+#define AFR_MSG_POST_OP_FAILED_STR "Post-op on thin-arbiter failed"
+#define AFR_MSG_TA_FRAME_CREATE_FAILED_STR "Failed to create ta_frame"
+#define AFR_MSG_SET_KEY_XATTROP_FAILED_STR "Could not set key during xattrop"
+#define AFR_MSG_BLOCKING_ENTRYLKS_FAILED_STR "Blocking entrylks failed"
+#define AFR_MSG_FSYNC_FAILED_STR "fsync failed"
+#define AFR_MSG_QUORUM_FAIL_STR "quorum is not met"
+#define AFR_MSG_FOP_FAILED_STR "Failing Fop"
+#define AFR_MSG_INVALID_SUBVOL_STR "not a subvolume"
+#define AFR_MSG_VOL_MISCONFIGURED_STR "Volume is dangling"
+#define AFR_MSG_CHILD_MISCONFIGURED_STR \
+ "replicate translator needs more than one subvolume defined"
+#define AFR_MSG_CLEAN_UP_FAILED_STR "Failed to clean up healer threads"
+#define AFR_MSG_QUORUM_OVERRIDE_STR "overriding quorum-count"
+#define AFR_MSG_UNABLE_TO_FETCH_STR \
+ "Unable to fetch afr-pending-xattr option from volfile. Falling back to " \
+ "using client translator names"
+#define AFR_MSG_NULL_DEREF_STR "possible NULL deref"
+#define AFR_MSG_XATTR_SET_FAILED_STR "Cannot set xattr cookie key"
+#define AFR_MSG_SPLIT_BRAIN_STATUS_STR "Failed to create synctask"
+#define AFR_MSG_SUBVOLS_DOWN_STR "All subvolumes are not up"
+#define AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR_STR \
+ "Failed to cancel split-brain choice"
+#define AFR_MSG_SPLIT_BRAIN_REPLICA_STR \
+ "Cannot set replica. File is not in data/metadata split-brain"
+#define AFR_MSG_INODE_CTX_FAILED_STR "Failed to get inode_ctx"
+#define AFR_MSG_READ_SUBVOL_ERROR_STR "no read subvols"
+#define AFR_MSG_LOCAL_CHILD_STR "selecting local read-child"
+#define AFR_MSG_LOOKUP_FAILED_STR "Failed to lookup/create thin-arbiter id file"
+#define AFR_MSG_TIMER_CREATE_FAIL_STR \
+ "Cannot create timer for delayed initialization"
+#define AFR_MSG_SUBVOL_UP_STR "Subvolume came back up; going online"
+#define AFR_MSG_ALL_SUBVOLS_DOWN_STR \
+ "All subvolumes are down. Going offline until atleast one of them is up"
+#define AFR_MSG_RELEASE_LOCK_FAILED_STR "Failed to release lock"
+#define AFR_MSG_INVALID_CHILD_UP_STR "Received child_up from invalid subvolume"
+#define AFR_MSG_QUORUM_MET_STR "Client-quorum is met"
+#define AFR_MSG_EXPUNGING_FILE_OR_DIR_STR "expunging file or dir"
+#define AFR_MSG_SELF_HEAL_FAILED_STR "Invalid"
+#define AFR_MSG_SPLIT_BRAIN_STR "Skipping conservative mergeon the file"
+#define AFR_MSG_CLEAR_TIME_SPLIT_BRAIN_STR "clear time split brain"
+#define AFR_MSG_READ_FAILED_STR "Failing read since good brick is down"
+#define AFR_MSG_LAUNCH_FAILED_STR "Failed to launch synctask"
+#define AFR_MSG_READ_SUBVOL_NOT_UP_STR \
+ "read subvolume in this generation is not up"
+#define AFR_MSG_INTERNAL_LKS_FAILED_STR \
+ "Unable to work with lk-owner while attempting fop"
+#define AFR_MSG_LOCK_XLATOR_NOT_LOADED_STR \
+ "subvolume does not support locking. please load features/locks xlator " \
+ "on server."
+#define AFR_MSG_FD_CTX_GET_FAILED_STR "unable to get fd ctx"
+#define AFR_MSG_INFO_COMMON_STR "fd not open on any subvolumes, aborting."
+#define AFR_MSG_REPLACE_BRICK_STATUS_STR "Couldn't acquire lock on any child."
+#define AFR_MSG_NEW_BRICK_STR "New brick"
+#define AFR_MSG_SPLIT_BRAIN_SET_FAILED_STR \
+ "Failed to set split-brain choice to -1"
+#define AFR_MSG_SPLIT_BRAIN_DETERMINE_FAILED_STR \
+ "Failed to determine split-brain. Aborting split-brain-choice set"
+#define AFR_MSG_OPEN_FAIL_STR "Failed to open subvolume"
+#define AFR_MSG_SET_PEND_XATTR_STR "Set of pending xattr"
+#define AFR_MSG_INTERNAL_ATTR_STR "is an internal extended attribute"
#endif /* !_AFR_MESSAGES_H_ */
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index f1da8b05201..64856042b65 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -8,327 +8,346 @@
cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-#include "statedump.h"
-
-#include "fd.h"
-
-#include "afr-inode-read.h"
-#include "afr-inode-write.h"
-#include "afr-dir-read.h"
-#include "afr-dir-write.h"
-#include "afr-transaction.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/statedump.h>
+#include "afr-transaction.h"
gf_boolean_t
-afr_is_fd_fixable (fd_t *fd)
+afr_is_fd_fixable(fd_t *fd)
{
- if (!fd || !fd->inode)
- return _gf_false;
- else if (fd_is_anonymous (fd))
- return _gf_false;
- else if (gf_uuid_is_null (fd->inode->gfid))
- return _gf_false;
-
- return _gf_true;
+ if (!fd || !fd->inode)
+ return _gf_false;
+ else if (fd_is_anonymous(fd))
+ return _gf_false;
+ else if (gf_uuid_is_null(fd->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
}
-
int
-afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+afr_open_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = frame->local;
+ afr_local_t *local = frame->local;
- AFR_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
- local->fd, xdata);
- return 0;
+ AFR_STACK_UNWIND(open, frame, local->op_ret, local->op_errno,
+ local->cont.open.fd, xdata);
+ return 0;
}
-
int
-afr_open_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd, dict_t *xdata)
+afr_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- local = frame->local;
- fd_ctx = local->fd_ctx;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
- } else {
- local->op_ret = op_ret;
- fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
- if (!local->xdata_rsp && xdata)
- local->xdata_rsp = dict_ref (xdata);
- }
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int child_index = (long)cookie;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ local = frame->local;
+ fd_ctx = local->fd_ctx;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ } else {
+ local->op_ret = op_ret;
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
}
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if ((fd_ctx->flags & O_TRUNC) && (local->op_ret >= 0)) {
- STACK_WIND (frame, afr_open_ftruncate_cbk,
- this, this->fops->ftruncate,
- fd, 0, NULL);
- } else {
- AFR_STACK_UNWIND (open, frame, local->op_ret,
- local->op_errno, local->fd,
- local->xdata_rsp);
- }
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
+
+ if (call_count == 0) {
+ afr_handle_replies_quorum(frame, this);
+ if (local->op_ret == -1) {
+ AFR_STACK_UNWIND(open, frame, local->op_ret, local->op_errno, NULL,
+ NULL);
+ } else if (fd_ctx->flags & O_TRUNC) {
+ STACK_WIND(frame, afr_open_ftruncate_cbk, this,
+ this->fops->ftruncate, fd, 0, NULL);
+ } else {
+ AFR_STACK_UNWIND(open, frame, local->op_ret, local->op_errno,
+ local->cont.open.fd, local->xdata_rsp);
}
+ }
- return 0;
+ return 0;
}
int
-afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
+afr_open_continue(call_frame_t *frame, xlator_t *this, int err)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- //We can't let truncation to happen outside transaction.
-
- priv = this->private;
-
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
-
- fd_ctx = afr_fd_ctx_get (fd, this);
- if (!fd_ctx) {
- op_errno = ENOMEM;
- goto out;
- }
-
- local->fd = fd_ref (fd);
- local->fd_ctx = fd_ctx;
- fd_ctx->flags = flags;
-
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (err) {
+ AFR_STACK_UNWIND(open, frame, -1, err, NULL, NULL);
+ } else {
+ local->call_count = AFR_COUNT(local->child_up, priv->child_count);
call_count = local->call_count;
- local->cont.open.flags = flags;
-
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_open_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->open,
- loc, (flags & ~O_TRUNC), fd, xdata);
- if (!--call_count)
- break;
- }
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(frame, afr_open_cbk, (void *)(long)i,
+ priv->children[i],
+ priv->children[i]->fops->open, &local->loc,
+ (local->cont.open.flags & ~O_TRUNC),
+ local->cont.open.fd, local->xdata_req);
+ if (!--call_count)
+ break;
+ }
}
+ }
+ return 0;
+}
- return 0;
+int
+afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int spb_subvol = 0;
+ int event_generation = 0;
+ int ret = 0;
+ int32_t op_errno = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ // We can't let truncation to happen outside transaction.
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_OPEN;
+ fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!fd_ctx) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
+ op_errno = afr_quorum_errno(priv);
+ goto out;
+ }
+
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
+
+ local->inode = inode_ref(loc->inode);
+ loc_copy(&local->loc, loc);
+ local->fd_ctx = fd_ctx;
+ fd_ctx->flags = flags;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ local->cont.open.flags = flags;
+ local->cont.open.fd = fd_ref(fd);
+
+ ret = afr_inode_get_readable(frame, local->inode, this, NULL,
+ &event_generation, AFR_DATA_TRANSACTION);
+ if ((ret < 0) &&
+ (afr_split_brain_read_subvol_get(local->inode, this, NULL,
+ &spb_subvol) == 0) &&
+ spb_subvol < 0) {
+ afr_inode_refresh(frame, this, local->inode, local->inode->gfid,
+ afr_open_continue);
+ } else {
+ afr_open_continue(frame, this, 0);
+ }
+
+ return 0;
out:
- AFR_STACK_UNWIND (open, frame, -1, op_errno, fd, NULL);
+ AFR_STACK_UNWIND(open, frame, -1, op_errno, fd, NULL);
- return 0;
+ return 0;
}
int
-afr_openfd_fix_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd,
- dict_t *xdata)
+afr_openfd_fix_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- priv = this->private;
- local = frame->local;
-
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int call_count = 0;
+ int child_index = (long)cookie;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret >= 0) {
+ gf_msg_debug(this->name, 0,
+ "fd for %s opened "
+ "successfully on subvolume %s",
+ local->loc.path, priv->children[child_index]->name);
+ } else {
+ gf_smsg(this->name, fop_log_level(GF_FOP_OPEN, op_errno), op_errno,
+ AFR_MSG_OPEN_FAIL, "path=%s", local->loc.path, "subvolume=%s",
+ priv->children[child_index]->name, NULL);
+ }
+
+ fd_ctx = local->fd_ctx;
+
+ LOCK(&local->fd->lock);
+ {
if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_DEBUG, "fd for %s opened "
- "successfully on subvolume %s", local->loc.path,
- priv->children[child_index]->name);
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
} else {
- gf_msg (this->name, fop_log_level (GF_FOP_OPEN, op_errno),
- op_errno, AFR_MSG_OPEN_FAIL, "Failed to open %s on "
- "subvolume %s", local->loc.path,
- priv->children[child_index]->name);
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
}
+ }
+ UNLOCK(&local->fd->lock);
- fd_ctx = local->fd_ctx;
+ call_count = afr_frame_return(frame);
+ if (call_count == 0)
+ AFR_STACK_DESTROY(frame);
- LOCK (&local->fd->lock);
- {
- if (op_ret >= 0) {
- fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
- } else {
- fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
- }
- }
- UNLOCK (&local->fd->lock);
+ return 0;
+}
- call_count = afr_frame_return (frame);
- if (call_count == 0)
- AFR_STACK_DESTROY (frame);
+static int
+afr_fd_ctx_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int count = 0;
+
+ priv = this->private;
+ fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!fd_ctx)
return 0;
-}
+ LOCK(&fd->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (fd_ctx->opened_on[i] == AFR_FD_NOT_OPENED &&
+ priv->child_up[i]) {
+ fd_ctx->opened_on[i] = AFR_FD_OPENING;
+ need_open[i] = 1;
+ count++;
+ } else {
+ need_open[i] = 0;
+ }
+ }
+ }
+ UNLOCK(&fd->lock);
-static int
-afr_fd_ctx_need_open (fd_t *fd, xlator_t *this, unsigned char *need_open)
-{
- afr_fd_ctx_t *fd_ctx = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int count = 0;
-
- priv = this->private;
-
- fd_ctx = afr_fd_ctx_get (fd, this);
- if (!fd_ctx)
- return 0;
-
- LOCK (&fd->lock);
- {
- for (i = 0; i < priv->child_count; i++) {
- if (fd_ctx->opened_on[i] == AFR_FD_NOT_OPENED &&
- priv->child_up[i]) {
- fd_ctx->opened_on[i] = AFR_FD_OPENING;
- need_open[i] = 1;
- count++;
- } else {
- need_open[i] = 0;
- }
- }
- }
- UNLOCK (&fd->lock);
-
- return count;
+ return count;
}
-
void
-afr_fix_open (fd_t *fd, xlator_t *this)
+afr_fix_open(fd_t *fd, xlator_t *this)
{
- afr_private_t *priv = NULL;
- int i = 0;
- call_frame_t *frame = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int32_t op_errno = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- unsigned char *need_open = NULL;
- int call_count = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ unsigned char *need_open = NULL;
+ int call_count = 0;
- priv = this->private;
+ priv = this->private;
- if (!afr_is_fd_fixable (fd))
- goto out;
+ if (!afr_is_fd_fixable(fd))
+ goto out;
- fd_ctx = afr_fd_ctx_get (fd, this);
- if (!fd_ctx)
- goto out;
+ fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!fd_ctx)
+ goto out;
- need_open = alloca0 (priv->child_count);
+ need_open = alloca0(priv->child_count);
- call_count = afr_fd_ctx_need_open (fd, this, need_open);
- if (!call_count)
- goto out;
+ call_count = afr_fd_ctx_need_open(fd, this, need_open);
+ if (!call_count)
+ goto out;
- frame = create_frame (this, this->ctx->pool);
- if (!frame)
- goto out;
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ goto out;
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- local->loc.inode = inode_ref (fd->inode);
- ret = loc_path (&local->loc, NULL);
- if (ret < 0)
- goto out;
+ local->loc.inode = inode_ref(fd->inode);
+ ret = loc_path(&local->loc, NULL);
+ if (ret < 0)
+ goto out;
- local->fd = fd_ref (fd);
- local->fd_ctx = fd_ctx;
+ local->fd = fd_ref(fd);
+ local->fd_ctx = fd_ctx;
- local->call_count = call_count;
+ local->call_count = call_count;
- gf_log (this->name, GF_LOG_DEBUG, "need open count: %d",
- call_count);
+ gf_msg_debug(this->name, 0, "need open count: %d", call_count);
- for (i = 0; i < priv->child_count; i++) {
- if (!need_open[i])
- continue;
-
- if (IA_IFDIR == fd->inode->ia_type) {
- gf_log (this->name, GF_LOG_DEBUG,
- "opening fd for dir %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk,
- (void*) (long) i,
- priv->children[i],
- priv->children[i]->fops->opendir,
- &local->loc, local->fd,
- NULL);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "opening fd for file %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk,
- (void *)(long) i,
- priv->children[i],
- priv->children[i]->fops->open,
- &local->loc,
- fd_ctx->flags & (~O_TRUNC),
- local->fd, NULL);
- }
-
- if (!--call_count)
- break;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!need_open[i])
+ continue;
+
+ if (IA_IFDIR == fd->inode->ia_type) {
+ gf_msg_debug(this->name, 0, "opening fd for dir %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE(frame, afr_openfd_fix_open_cbk, (void *)(long)i,
+ priv->children[i],
+ priv->children[i]->fops->opendir, &local->loc,
+ local->fd, NULL);
+ } else {
+ gf_msg_debug(this->name, 0,
+ "opening fd for file %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE(frame, afr_openfd_fix_open_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->open,
+ &local->loc, fd_ctx->flags & (~O_TRUNC),
+ local->fd, NULL);
}
- return;
+ if (!--call_count)
+ break;
+ }
+
+ return;
out:
- if (frame)
- AFR_STACK_DESTROY (frame);
+ if (frame)
+ AFR_STACK_DESTROY(frame);
}
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
index eaa73d9be20..6fc2c75145c 100644
--- a/xlators/cluster/afr/src/afr-read-txn.c
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -10,123 +10,329 @@
#include "afr.h"
#include "afr-transaction.h"
+#include "afr-messages.h"
-int
-afr_read_txn_next_subvol (call_frame_t *frame, xlator_t *this)
+void
+afr_pending_read_increment(afr_private_t *priv, int child_index)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int subvol = -1;
-
- local = frame->local;
- priv = this->private;
-
-
- for (i = 0; i < priv->child_count; i++) {
- if (!local->readable[i]) {
- /* don't even bother trying here.
- just mark as attempted and move on. */
- local->read_attempted[i] = 1;
- continue;
- }
-
- if (!local->read_attempted[i]) {
- subvol = i;
- break;
- }
- }
-
- /* If no more subvols were available for reading, we leave
- @subvol as -1, which is an indication we have run out of
- readable subvols. */
- if (subvol != -1)
- local->read_attempted[subvol] = 1;
- local->readfn (frame, this, subvol);
-
- return 0;
+ if (child_index < 0 || child_index > priv->child_count)
+ return;
+
+ GF_ATOMIC_INC(priv->pending_reads[child_index]);
}
+void
+afr_pending_read_decrement(afr_private_t *priv, int child_index)
+{
+ if (child_index < 0 || child_index > priv->child_count)
+ return;
+
+ GF_ATOMIC_DEC(priv->pending_reads[child_index]);
+}
+
+void
+afr_read_txn_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ afr_pending_read_decrement(priv, local->read_subvol);
+ local->read_subvol = subvol;
+ afr_pending_read_increment(priv, subvol);
+ local->readfn(frame, this, subvol);
+}
int
-afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
+afr_read_txn_next_subvol(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- int read_subvol = 0;
- int event_generation = 0;
- inode_t *inode = NULL;
- int ret = -1;
- int spb_choice = -1;
-
- local = frame->local;
- inode = local->inode;
-
- if (err) {
- local->op_errno = -err;
- local->op_ret = -1;
- read_subvol = -1;
- goto readfn;
- }
-
- ret = afr_inode_read_subvol_type_get (inode, this, local->readable,
- &event_generation,
- local->transaction.type);
-
- if (ret == -1 || !event_generation) {
- /* Even after refresh, we don't have a good
- read subvolume. Time to bail */
- local->op_ret = -1;
- local->op_errno = EIO;
- read_subvol = -1;
- goto readfn;
- }
-
- read_subvol = afr_read_subvol_select_by_policy (inode, this,
- local->readable);
-
- if (read_subvol == -1) {
- local->op_ret = -1;
- local->op_errno = EIO;
- goto readfn;
- }
-
- if (local->read_attempted[read_subvol]) {
- afr_read_txn_next_subvol (frame, this);
- return 0;
- }
-
- local->read_attempted[read_subvol] = 1;
-readfn:
- if (read_subvol == -1) {
- ret = afr_inode_split_brain_choice_get (inode, this,
- &spb_choice);
- if ((ret == 0) && spb_choice >= 0)
- read_subvol = spb_choice;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int subvol = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->readable[i]) {
+ /* don't even bother trying here.
+ just mark as attempted and move on. */
+ local->read_attempted[i] = 1;
+ continue;
}
- local->readfn (frame, this, read_subvol);
- return 0;
+ if (!local->read_attempted[i]) {
+ subvol = i;
+ break;
+ }
+ }
+
+ /* If no more subvols were available for reading, we leave
+ @subvol as -1, which is an indication we have run out of
+ readable subvols. */
+ if (subvol != -1)
+ local->read_attempted[subvol] = 1;
+ afr_read_txn_wind(frame, this, subvol);
+
+ return 0;
}
+static int
+afr_ta_read_txn_done(int ret, call_frame_t *ta_frame, void *opaque)
+{
+ STACK_DESTROY(ta_frame->root);
+ return 0;
+}
+
+static int
+afr_ta_read_txn(void *opaque)
+{
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ int read_subvol = -1;
+ int query_child = AFR_CHILD_UNKNOWN;
+ int possible_bad_child = AFR_CHILD_UNKNOWN;
+ int ret = 0;
+ int op_errno = ENOMEM;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+ dict_t *xdata_req = NULL;
+ dict_t *xdata_rsp = NULL;
+ int **pending = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ frame = (call_frame_t *)opaque;
+ this = frame->this;
+ local = frame->local;
+ priv = this->private;
+ query_child = local->read_txn_query_child;
+
+ if (query_child == AFR_CHILD_ZERO) {
+ possible_bad_child = AFR_CHILD_ONE;
+ } else if (query_child == AFR_CHILD_ONE) {
+ possible_bad_child = AFR_CHILD_ZERO;
+ } else {
+ /*read_txn_query_child is AFR_CHILD_UNKNOWN*/
+ goto out;
+ }
+
+ /* Ask the query_child to see if it blames the possibly bad one. */
+ xdata_req = dict_new();
+ if (!xdata_req)
+ goto out;
+
+ pending = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!pending)
+ goto out;
+
+ ret = afr_set_pending_dict(priv, xdata_req, pending);
+ if (ret < 0)
+ goto out;
+
+ if (local->fd) {
+ ret = syncop_fxattrop(priv->children[query_child], local->fd,
+ GF_XATTROP_ADD_ARRAY, xdata_req, NULL, &xdata_rsp,
+ NULL);
+ } else {
+ ret = syncop_xattrop(priv->children[query_child], &local->loc,
+ GF_XATTROP_ADD_ARRAY, xdata_req, NULL, &xdata_rsp,
+ NULL);
+ }
+ if (ret || !xdata_rsp) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed xattrop for gfid %s on %s",
+ uuid_utoa(local->inode->gfid),
+ priv->children[query_child]->name);
+ op_errno = -ret;
+ goto out;
+ }
+
+ if (afr_ta_dict_contains_pending_xattr(xdata_rsp, priv,
+ possible_bad_child)) {
+ read_subvol = query_child;
+ goto out;
+ }
+ dict_unref(xdata_rsp);
+ xdata_rsp = NULL;
+
+ /* It doesn't. So query thin-arbiter to see if it blames any data brick. */
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate thin-arbiter loc for: %s.", loc.name);
+ goto out;
+ }
+ flock.l_type = F_WRLCK; /*start and length are already zero. */
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_MODIFY, &loc, F_SETLKW, &flock, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "gfid:%s: Failed to get AFR_TA_DOM_MODIFY lock on %s.",
+ uuid_utoa(local->inode->gfid),
+ priv->pending_key[THIN_ARBITER_BRICK_INDEX]);
+ op_errno = -ret;
+ goto out;
+ }
+
+ ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], &loc,
+ GF_XATTROP_ADD_ARRAY, xdata_req, NULL, &xdata_rsp,
+ NULL);
+ if (ret || !xdata_rsp) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "gfid:%s: Failed xattrop on %s.", uuid_utoa(local->inode->gfid),
+ priv->pending_key[THIN_ARBITER_BRICK_INDEX]);
+ op_errno = -ret;
+ goto unlock;
+ }
+
+ if (!afr_ta_dict_contains_pending_xattr(xdata_rsp, priv, query_child)) {
+ read_subvol = query_child;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, EIO, AFR_MSG_THIN_ARB,
+ "Failing read for gfid %s since good brick %s is down",
+ uuid_utoa(local->inode->gfid),
+ priv->children[possible_bad_child]->name);
+ op_errno = EIO;
+ }
+
+unlock:
+ flock.l_type = F_UNLCK;
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_MODIFY, &loc, F_SETLK, &flock, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "gfid:%s: Failed to unlock AFR_TA_DOM_MODIFY lock on "
+ "%s.",
+ uuid_utoa(local->inode->gfid),
+ priv->pending_key[THIN_ARBITER_BRICK_INDEX]);
+ }
+out:
+ if (xdata_req)
+ dict_unref(xdata_req);
+ if (xdata_rsp)
+ dict_unref(xdata_rsp);
+ if (pending)
+ afr_matrix_cleanup(pending, priv->child_count);
+ loc_wipe(&loc);
+
+ if (read_subvol == -1) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
+ afr_read_txn_wind(frame, this, read_subvol);
+ return ret;
+}
+
+void
+afr_ta_read_txn_synctask(call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *ta_frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ ta_frame = afr_ta_frame_create(this);
+ if (!ta_frame) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ "Failed to create ta_frame");
+ goto out;
+ }
+ ret = synctask_new(this->ctx->env, afr_ta_read_txn, afr_ta_read_txn_done,
+ ta_frame, frame);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ "Failed to launch "
+ "afr_ta_read_txn synctask for gfid %s.",
+ uuid_utoa(local->inode->gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ STACK_DESTROY(ta_frame->root);
+ goto out;
+ }
+ return;
+out:
+ afr_read_txn_wind(frame, this, -1);
+}
int
-afr_read_txn_continue (call_frame_t *frame, xlator_t *this, int subvol)
+afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
{
- afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int read_subvol = -1;
+ inode_t *inode = NULL;
+ int ret = -1;
+ int spb_subvol = -1;
+
+ local = frame->local;
+ inode = local->inode;
+ priv = this->private;
+
+ if (err) {
+ if (!priv->thin_arbiter_count)
+ goto readfn;
+ if (err != EINVAL)
+ goto readfn;
+ /* We need to query the good bricks and/or thin-arbiter.*/
+ afr_ta_read_txn_synctask(frame, this);
+ return 0;
+ }
+
+ read_subvol = afr_read_subvol_select_by_policy(inode, this, local->readable,
+ NULL);
+ if (read_subvol == -1) {
+ err = EIO;
+ goto readfn;
+ }
+
+ if (local->read_attempted[read_subvol]) {
+ afr_read_txn_next_subvol(frame, this);
+ return 0;
+ }
+
+ local->read_attempted[read_subvol] = 1;
+readfn:
+ if (read_subvol == -1) {
+ ret = afr_split_brain_read_subvol_get(inode, this, frame, &spb_subvol);
+ if ((ret == 0) && spb_subvol >= 0)
+ read_subvol = spb_subvol;
+ }
+
+ if (read_subvol == -1) {
+ AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN(-1, err);
+ }
+ afr_read_txn_wind(frame, this, read_subvol);
+
+ return 0;
+}
- local = frame->local;
+int
+afr_read_txn_continue(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
- if (!local->refreshed) {
- local->refreshed = _gf_true;
- afr_inode_refresh (frame, this, local->inode,
- afr_read_txn_refresh_done);
- } else {
- afr_read_txn_next_subvol (frame, this);
- }
+ local = frame->local;
- return 0;
-}
+ if (!local->refreshed) {
+ local->refreshed = _gf_true;
+ afr_inode_refresh(frame, this, local->inode, NULL,
+ afr_read_txn_refresh_done);
+ } else {
+ afr_read_txn_next_subvol(frame, this);
+ }
+ return 0;
+}
/* afr_read_txn_wipe:
@@ -136,27 +342,26 @@ afr_read_txn_continue (call_frame_t *frame, xlator_t *this, int subvol)
*/
void
-afr_read_txn_wipe (call_frame_t *frame, xlator_t *this)
+afr_read_txn_wipe(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- local->readfn = NULL;
+ local->readfn = NULL;
- if (local->inode)
- inode_unref (local->inode);
+ if (local->inode)
+ inode_unref(local->inode);
- for (i = 0; i < priv->child_count; i++) {
- local->read_attempted[i] = 0;
- local->readable[i] = 0;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ local->read_attempted[i] = 0;
+ local->readable[i] = 0;
+ }
}
-
/*
afr_read_txn:
@@ -185,76 +390,105 @@ afr_read_txn_wipe (call_frame_t *frame, xlator_t *this)
*/
int
-afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
- afr_read_txn_wind_t readfn, afr_transaction_type type)
+afr_read_txn(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ afr_read_txn_wind_t readfn, afr_transaction_type type)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int read_subvol = -1;
- int event_generation = 0;
- int ret = -1;
-
- priv = this->private;
- local = frame->local;
-
- afr_read_txn_wipe (frame, this);
-
- local->readfn = readfn;
- local->inode = inode_ref (inode);
-
- if (priv->quorum_reads &&
- priv->quorum_count && !afr_has_quorum (priv->child_up, this)) {
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- read_subvol = -1;
- goto read;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char *data = NULL;
+ unsigned char *metadata = NULL;
+ int read_subvol = -1;
+ int event_generation = 0;
+ int ret = -1;
+
+ priv = this->private;
+ local = frame->local;
+ data = alloca0(priv->child_count);
+ metadata = alloca0(priv->child_count);
+
+ afr_read_txn_wipe(frame, this);
+
+ local->readfn = readfn;
+ local->inode = inode_ref(inode);
+ local->is_read_txn = _gf_true;
+ local->transaction.type = type;
+
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = afr_quorum_errno(priv);
+ goto read;
+ }
+
+ if (!afr_is_consistent_io_possible(local, priv, &local->op_errno)) {
+ local->op_ret = -1;
+ goto read;
+ }
+
+ if (priv->thin_arbiter_count && !afr_ta_has_quorum(priv, local)) {
+ local->op_ret = -1;
+ local->op_errno = -afr_quorum_errno(priv);
+ goto read;
+ }
+
+ if (priv->thin_arbiter_count &&
+ AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) {
+ if (local->child_up[0]) {
+ local->read_txn_query_child = AFR_CHILD_ZERO;
+ } else if (local->child_up[1]) {
+ local->read_txn_query_child = AFR_CHILD_ONE;
}
-
-
- local->transaction.type = type;
- ret = afr_inode_read_subvol_type_get (inode, this, local->readable,
- &event_generation, type);
- if (ret == -1)
- /* very first transaction on this inode */
- goto refresh;
-
- gf_log (this->name, GF_LOG_DEBUG, "%s: generation now vs cached: %d, "
- "%d", uuid_utoa (inode->gfid), local->event_generation,
- event_generation);
- if (local->event_generation != event_generation)
- /* servers have disconnected / reconnected, and possibly
- rebooted, very likely changing the state of freshness
- of copies */
- goto refresh;
-
- read_subvol = afr_read_subvol_select_by_policy (inode, this,
- local->readable);
-
- if (read_subvol < 0 || read_subvol > priv->child_count) {
- gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN,
- "Unreadable subvolume %d found with event generation "
- "%d. (Possible split-brain)",
- read_subvol, event_generation);
- goto refresh;
- }
-
- if (!local->child_up[read_subvol]) {
- /* should never happen, just in case */
- gf_log (this->name, GF_LOG_WARNING, "subvolume %d is the "
- "read subvolume in this generation, but is not up",
- read_subvol);
- goto refresh;
- }
-
- local->read_attempted[read_subvol] = 1;
+ afr_ta_read_txn_synctask(frame, this);
+ return 0;
+ }
+
+ ret = afr_inode_read_subvol_get(inode, this, data, metadata,
+ &event_generation);
+ if (ret == -1)
+ /* very first transaction on this inode */
+ goto refresh;
+ AFR_INTERSECT(local->readable, data, metadata, priv->child_count);
+
+ gf_msg_debug(this->name, 0,
+ "%s: generation now vs cached: %d, "
+ "%d",
+ uuid_utoa(inode->gfid), local->event_generation,
+ event_generation);
+ if (afr_is_inode_refresh_reqd(inode, this, local->event_generation,
+ event_generation))
+ /* servers have disconnected / reconnected, and possibly
+ rebooted, very likely changing the state of freshness
+ of copies */
+ goto refresh;
+
+ read_subvol = afr_read_subvol_select_by_policy(inode, this, local->readable,
+ NULL);
+
+ if (read_subvol < 0 || read_subvol > priv->child_count) {
+ gf_msg_debug(this->name, 0,
+ "Unreadable subvolume %d found "
+ "with event generation %d for gfid %s.",
+ read_subvol, event_generation, uuid_utoa(inode->gfid));
+ goto refresh;
+ }
+
+ if (!local->child_up[read_subvol]) {
+ /* should never happen, just in case */
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_READ_SUBVOL_ERROR,
+ "subvolume %d is the "
+ "read subvolume in this generation, but is not up",
+ read_subvol);
+ goto refresh;
+ }
+
+ local->read_attempted[read_subvol] = 1;
read:
- local->readfn (frame, this, read_subvol);
+ afr_read_txn_wind(frame, this, read_subvol);
- return 0;
+ return 0;
refresh:
- afr_inode_refresh (frame, this, inode, afr_read_txn_refresh_done);
+ afr_inode_refresh(frame, this, inode, NULL, afr_read_txn_refresh_done);
- return 0;
+ return 0;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index f3d1f8b0b14..a580a1584cc 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -8,283 +8,808 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "afr.h"
#include "afr-self-heal.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "protocol-common.h"
+#include "afr-messages.h"
+#include <glusterfs/events.h>
+
+void
+afr_heal_synctask(xlator_t *this, afr_local_t *local);
int
-afr_selfheal_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
+ inode_t *inode, struct afr_reply *replies, int source,
+ unsigned char *sources, void *gfid, int *gfid_idx)
{
- afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ unsigned char *wind_on = NULL;
+ ia_type_t ia_type = IA_INVAL;
+ dict_t *xdata = NULL;
+ loc_t loc = {
+ 0,
+ };
+ int ret = 0;
+ int i = 0;
+
+ priv = this->private;
+ wind_on = alloca0(priv->child_count);
+ if (source >= 0 && replies[source].valid && replies[source].op_ret == 0)
+ ia_type = replies[source].poststat.ia_type;
+
+ if (ia_type != IA_INVAL)
+ goto heal;
+
+ /* If ia_type is still invalid, it means either
+ * (a)'source' was -1, i.e. parent dir pending xattrs are in split-brain
+ * (or) (b) The parent dir pending xattrs are all zeroes (i.e. all bricks
+ * are sources) and the 'source' we selected earlier might be the one where
+ * the file is not actually present.
+ *
+ * In both cases, let us pick a brick with a successful reply and use its
+ * ia_type.
+ * */
+ for (i = 0; i < priv->child_count; i++) {
+ if (source == -1) {
+ /* case (a) above. */
+ if (replies[i].valid && replies[i].op_ret == 0 &&
+ replies[i].poststat.ia_type != IA_INVAL) {
+ ia_type = replies[i].poststat.ia_type;
+ break;
+ }
+ } else {
+ /* case (b) above. */
+ if (i == source)
+ continue;
+ if (sources[i] && replies[i].valid && replies[i].op_ret == 0 &&
+ replies[i].poststat.ia_type != IA_INVAL) {
+ ia_type = replies[i].poststat.ia_type;
+ break;
+ }
+ }
+ }
+
+heal:
+ /* gfid heal on those subvolumes that do not have gfid associated
+ * with the inode and update those replies.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
+
+ if (gf_uuid_is_null(gfid) &&
+ !gf_uuid_is_null(replies[i].poststat.ia_gfid) &&
+ replies[i].poststat.ia_type == ia_type)
+ gfid = replies[i].poststat.ia_gfid;
+
+ if (!gf_uuid_is_null(replies[i].poststat.ia_gfid) ||
+ replies[i].poststat.ia_type != ia_type)
+ continue;
+
+ wind_on[i] = 1;
+ }
+
+ if (AFR_COUNT(wind_on, priv->child_count) == 0)
+ return 0;
+
+ xdata = dict_new();
+ if (!xdata) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_gfuuid(xdata, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ frame = afr_frame_create(this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+
+ local = frame->local;
+ loc.parent = inode_ref(parent);
+ gf_uuid_copy(loc.pargfid, parent->gfid);
+ loc.name = name;
+ loc.inode = inode_ref(inode);
+
+ AFR_ONLIST(wind_on, frame, afr_selfheal_discover_cbk, lookup, &loc, xdata);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!wind_on[i])
+ continue;
+ afr_reply_wipe(&replies[i]);
+ afr_reply_copy(&replies[i], &local->replies[i]);
+ }
+ if (gfid_idx && (*gfid_idx == -1)) {
+ /*Pick a brick where the gifd heal was successful.*/
+ for (i = 0; i < priv->child_count; i++) {
+ if (!wind_on[i])
+ continue;
+ if (replies[i].valid && replies[i].op_ret == 0 &&
+ !gf_uuid_is_null(replies[i].poststat.ia_gfid)) {
+ *gfid_idx = i;
+ break;
+ }
+ }
+ }
+out:
+ if (gfid_idx && (*gfid_idx == -1) && (ret == 0) && local) {
+ ret = -afr_final_errno(local, priv);
+ }
+ loc_wipe(&loc);
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ if (xdata)
+ dict_unref(xdata);
+
+ return ret;
+}
- local = frame->local;
+int
+afr_gfid_sbrain_source_from_src_brick(xlator_t *this, struct afr_reply *replies,
+ char *src_brick)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (strcmp(priv->children[i]->name, src_brick) == 0)
+ return i;
+ }
+ return -1;
+}
- syncbarrier_wake (&local->barrier);
+int
+afr_selfheal_gfid_mismatch_by_majority(struct afr_reply *replies,
+ int child_count)
+{
+ int j = 0;
+ int i = 0;
+ int votes;
+
+ for (i = 0; i < child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+
+ votes = 1;
+ for (j = i + 1; j < child_count; j++) {
+ if ((!gf_uuid_compare(replies[i].poststat.ia_gfid,
+ replies[j].poststat.ia_gfid)))
+ votes++;
+ if (votes > child_count / 2)
+ return i;
+ }
+ }
- return 0;
+ return -1;
}
+int
+afr_gfid_sbrain_source_from_bigger_file(struct afr_reply *replies,
+ int child_count)
+{
+ int i = 0;
+ int src = -1;
+ uint64_t size = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (size < replies[i].poststat.ia_size) {
+ src = i;
+ size = replies[i].poststat.ia_size;
+ } else if (replies[i].poststat.ia_size == size) {
+ src = -1;
+ }
+ }
+ return src;
+}
int
-afr_selfheal_post_op (call_frame_t *frame, xlator_t *this, inode_t *inode,
- int subvol, dict_t *xattr)
+afr_gfid_sbrain_source_from_latest_mtime(struct afr_reply *replies,
+ int child_count)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- loc_t loc = {0, };
+ int i = 0;
+ int src = -1;
+ uint32_t mtime = 0;
+ uint32_t mtime_nsec = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
+ if ((mtime < replies[i].poststat.ia_mtime) ||
+ ((mtime == replies[i].poststat.ia_mtime) &&
+ (mtime_nsec < replies[i].poststat.ia_mtime_nsec))) {
+ src = i;
+ mtime = replies[i].poststat.ia_mtime;
+ mtime_nsec = replies[i].poststat.ia_mtime_nsec;
+ } else if ((mtime == replies[i].poststat.ia_mtime) &&
+ (mtime_nsec == replies[i].poststat.ia_mtime_nsec)) {
+ src = -1;
+ }
+ }
+ return src;
+}
- priv = this->private;
- local = frame->local;
+int
+afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+ inode_t *inode, uuid_t pargfid, const char *bname,
+ int src_idx, int child_idx,
+ unsigned char *locked_on, int *src, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ char g1[64] = {
+ 0,
+ };
+ char g2[64] = {
+ 0,
+ };
+ int up_count = 0;
+ int heal_op = -1;
+ int ret = -1;
+ char *src_brick = NULL;
+
+ *src = -1;
+ priv = this->private;
+ up_count = AFR_COUNT(locked_on, priv->child_count);
+ if (up_count != priv->child_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "All the bricks should be up to resolve the gfid split "
+ "barin");
+ if (xdata) {
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ SALL_BRICKS_UP_TO_RESOLVE);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
+ "Error setting"
+ " gfid-heal-msg dict");
+ }
+ goto out;
+ }
- loc.inode = inode_ref (inode);
- gf_uuid_copy (loc.gfid, inode->gfid);
+ if (xdata) {
+ ret = dict_get_int32_sizen(xdata, "heal-op", &heal_op);
+ if (ret)
+ goto fav_child;
+ } else {
+ goto fav_child;
+ }
- STACK_WIND (frame, afr_selfheal_post_op_cbk, priv->children[subvol],
- priv->children[subvol]->fops->xattrop, &loc,
- GF_XATTROP_ADD_ARRAY, xattr, NULL);
+ switch (heal_op) {
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:
+ *src = afr_gfid_sbrain_source_from_bigger_file(replies,
+ priv->child_count);
+ if (*src == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ SNO_BIGGER_FILE);
+ if (xdata) {
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ SNO_BIGGER_FILE);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_DICT_SET_FAILED,
+ "Error"
+ " setting gfid-heal-msg dict");
+ }
+ }
+ break;
+
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME:
+ *src = afr_gfid_sbrain_source_from_latest_mtime(replies,
+ priv->child_count);
+ if (*src == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ SNO_DIFF_IN_MTIME);
+ if (xdata) {
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ SNO_DIFF_IN_MTIME);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_DICT_SET_FAILED,
+ "Error"
+ "setting gfid-heal-msg dict");
+ }
+ }
+ break;
- syncbarrier_wait (&local->barrier, 1);
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
+ ret = dict_get_str_sizen(xdata, "child-name", &src_brick);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "Error getting the source "
+ "brick");
+ break;
+ }
+ *src = afr_gfid_sbrain_source_from_src_brick(this, replies,
+ src_brick);
+ if (*src == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ SERROR_GETTING_SRC_BRICK);
+ if (xdata) {
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ SERROR_GETTING_SRC_BRICK);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_DICT_SET_FAILED,
+ "Error"
+ " setting gfid-heal-msg dict");
+ }
+ }
+ break;
- loc_wipe (&loc);
+ default:
+ break;
+ }
+ goto out;
+
+fav_child:
+ switch (priv->fav_child_policy) {
+ case AFR_FAV_CHILD_BY_SIZE:
+ *src = afr_sh_fav_by_size(this, replies, inode);
+ break;
+ case AFR_FAV_CHILD_BY_MTIME:
+ *src = afr_sh_fav_by_mtime(this, replies, inode);
+ break;
+ case AFR_FAV_CHILD_BY_CTIME:
+ *src = afr_sh_fav_by_ctime(this, replies, inode);
+ break;
+ case AFR_FAV_CHILD_BY_MAJORITY:
+ if (priv->child_count != 2)
+ *src = afr_selfheal_gfid_mismatch_by_majority(
+ replies, priv->child_count);
+ else
+ *src = -1;
+
+ if (*src == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "No majority to resolve "
+ "gfid split brain");
+ }
+ break;
+ default:
+ break;
+ }
- return 0;
+out:
+ if (*src == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "Gfid mismatch detected for <gfid:%s>/%s>, %s on %s and"
+ " %s on %s.",
+ uuid_utoa(pargfid), bname,
+ uuid_utoa_r(replies[child_idx].poststat.ia_gfid, g1),
+ priv->children[child_idx]->name,
+ uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2),
+ priv->children[src_idx]->name);
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
+ "subvol=%s;type=gfid;file="
+ "<gfid:%s>/%s>;count=2;child-%d=%s;gfid-%d=%s;"
+ "child-%d=%s;gfid-%d=%s",
+ this->ctx->cmd_args.client_pid, this->name, uuid_utoa(pargfid),
+ bname, child_idx, priv->children[child_idx]->name, child_idx,
+ uuid_utoa_r(replies[child_idx].poststat.ia_gfid, g1), src_idx,
+ priv->children[src_idx]->name, src_idx,
+ uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2));
+ return -1;
+ }
+ return 0;
}
+int
+afr_selfheal_post_op_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
-dict_t *
-afr_selfheal_output_xattr (xlator_t *this, afr_transaction_type type,
- int *output_dirty, int **output_matrix, int subvol)
-{
- dict_t *xattr = NULL;
- afr_private_t *priv = NULL;
- int j = 0;
- int idx = 0;
- int ret = 0;
- int *raw = 0;
-
- priv = this->private;
- idx = afr_index_for_transaction_type (type);
-
- xattr = dict_new ();
- if (!xattr)
- return NULL;
-
- /* clear dirty */
- raw = GF_CALLOC (sizeof(int), AFR_NUM_CHANGE_LOGS, gf_afr_mt_int32_t);
- if (!raw)
- goto err;
-
- raw[idx] = hton32 (output_dirty[subvol]);
- ret = dict_set_bin (xattr, AFR_DIRTY, raw,
- sizeof(int) * AFR_NUM_CHANGE_LOGS);
- if (ret)
- goto err;
-
- /* clear/set pending */
- for (j = 0; j < priv->child_count; j++) {
- raw = GF_CALLOC (sizeof(int), AFR_NUM_CHANGE_LOGS,
- gf_afr_mt_int32_t);
- if (!raw)
- goto err;
-
- raw[idx] = hton32 (output_matrix[subvol][j]);
-
- ret = dict_set_bin (xattr, priv->pending_key[j],
- raw, sizeof(int) * AFR_NUM_CHANGE_LOGS);
- if (ret)
- goto err;
- }
-
- return xattr;
-err:
- if (xattr)
- dict_unref (xattr);
- return NULL;
+ local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ syncbarrier_wake(&local->barrier);
+
+ return 0;
}
+int
+afr_selfheal_post_op(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int subvol, dict_t *xattr, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ loc_t loc = {
+ 0,
+ };
+ int ret = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ local->op_ret = 0;
+
+ STACK_WIND(frame, afr_selfheal_post_op_cbk, priv->children[subvol],
+ priv->children[subvol]->fops->xattrop, &loc,
+ GF_XATTROP_ADD_ARRAY, xattr, xdata);
+
+ syncbarrier_wait(&local->barrier, 1);
+ if (local->op_ret < 0)
+ ret = -local->op_errno;
+
+ loc_wipe(&loc);
+ local->op_ret = 0;
+
+ return ret;
+}
int
-afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,
- unsigned char *sources, unsigned char *sinks,
- unsigned char *healed_sinks, afr_transaction_type type,
- struct afr_reply *replies, unsigned char *locked_on)
-{
- afr_private_t *priv = NULL;
- int i = 0;
- int j = 0;
- unsigned char *pending = NULL;
- int *input_dirty = NULL;
- int **input_matrix = NULL;
- int *output_dirty = NULL;
- int **output_matrix = NULL;
- dict_t *xattr = NULL;
-
- priv = this->private;
-
- pending = alloca0 (priv->child_count);
-
- input_dirty = alloca0 (priv->child_count * sizeof (int));
- input_matrix = ALLOC_MATRIX (priv->child_count, int);
- output_dirty = alloca0 (priv->child_count * sizeof (int));
- output_matrix = ALLOC_MATRIX (priv->child_count, int);
-
- afr_selfheal_extract_xattr (this, replies, type, input_dirty,
- input_matrix);
-
- for (i = 0; i < priv->child_count; i++)
- if (sinks[i] && !healed_sinks[i])
- pending[i] = 1;
-
- for (i = 0; i < priv->child_count; i++) {
- for (j = 0; j < priv->child_count; j++) {
- if (pending[j])
- output_matrix[i][j] = 1;
- else
- output_matrix[i][j] = -input_matrix[i][j];
- }
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (!pending[i])
- output_dirty[i] = -input_dirty[i];
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (!locked_on[i])
- /* perform post-op only on subvols we had locked
- and inspected on.
- */
- continue;
-
- xattr = afr_selfheal_output_xattr (this, type, output_dirty,
- output_matrix, i);
- if (!xattr) {
- gf_log (this->name, GF_LOG_ERROR,
- "unable to allocate xdata for subvol %d", i);
- continue;
- }
-
- afr_selfheal_post_op (frame, this, inode, i, xattr);
-
- dict_unref (xattr);
- }
-
- return 0;
+afr_check_stale_error(struct afr_reply *replies, afr_private_t *priv)
+{
+ int i = 0;
+ int op_errno = 0;
+ int tmp_errno = 0;
+ int stale_count = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ tmp_errno = replies[i].op_errno;
+ if (tmp_errno == ENOENT || tmp_errno == ESTALE) {
+ op_errno = afr_higher_errno(op_errno, tmp_errno);
+ stale_count++;
+ }
+ }
+ if (stale_count != priv->child_count)
+ return -ENOTCONN;
+ else
+ return -op_errno;
}
+int
+afr_sh_generic_fop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ int i = (long)cookie;
+ afr_local_t *local = NULL;
-void
-afr_replies_copy (struct afr_reply *dst, struct afr_reply *src, int count)
-{
- int i = 0;
- dict_t *xdata = NULL;
-
- if (dst == src)
- return;
-
- for (i = 0; i < count; i++) {
- dst[i].valid = src[i].valid;
- dst[i].op_ret = src[i].op_ret;
- dst[i].op_errno = src[i].op_errno;
- dst[i].prestat = src[i].prestat;
- dst[i].poststat = src[i].poststat;
- dst[i].preparent = src[i].preparent;
- dst[i].postparent = src[i].postparent;
- dst[i].preparent2 = src[i].preparent2;
- dst[i].postparent2 = src[i].postparent2;
- if (src[i].xdata)
- xdata = dict_ref (src[i].xdata);
- else
- xdata = NULL;
- if (dst[i].xdata)
- dict_unref (dst[i].xdata);
- dst[i].xdata = xdata;
- memcpy (dst[i].checksum, src[i].checksum,
- MD5_DIGEST_LENGTH);
- }
+ local = frame->local;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (pre)
+ local->replies[i].prestat = *pre;
+ if (post)
+ local->replies[i].poststat = *post;
+ if (xdata)
+ local->replies[i].xdata = dict_ref(xdata);
+
+ syncbarrier_wake(&local->barrier);
+
+ return 0;
}
+int
+afr_selfheal_restore_time(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, unsigned char *healed_sinks,
+ struct afr_reply *replies)
+{
+ loc_t loc = {
+ 0,
+ };
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, setattr, &loc,
+ &replies[source].poststat,
+ (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME | GF_SET_ATTR_CTIME),
+ NULL);
+
+ loc_wipe(&loc);
+
+ return 0;
+}
+
+dict_t *
+afr_selfheal_output_xattr(xlator_t *this, gf_boolean_t is_full_crawl,
+ afr_transaction_type type, int *output_dirty,
+ int **output_matrix, int subvol,
+ int **full_heal_mtx_out)
+{
+ int j = 0;
+ int idx = 0;
+ int d_idx = 0;
+ int ret = 0;
+ int *raw = 0;
+ dict_t *xattr = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ idx = afr_index_for_transaction_type(type);
+ d_idx = afr_index_for_transaction_type(AFR_DATA_TRANSACTION);
+
+ xattr = dict_new();
+ if (!xattr)
+ return NULL;
+
+ /* clear dirty */
+ raw = GF_CALLOC(sizeof(int), AFR_NUM_CHANGE_LOGS, gf_afr_mt_int32_t);
+ if (!raw)
+ goto err;
+
+ raw[idx] = hton32(output_dirty[subvol]);
+ ret = dict_set_bin(xattr, AFR_DIRTY, raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ GF_FREE(raw);
+ goto err;
+ }
+
+ /* clear/set pending */
+ for (j = 0; j < priv->child_count; j++) {
+ raw = GF_CALLOC(sizeof(int), AFR_NUM_CHANGE_LOGS, gf_afr_mt_int32_t);
+ if (!raw)
+ goto err;
+
+ raw[idx] = hton32(output_matrix[subvol][j]);
+ if (is_full_crawl)
+ raw[d_idx] = hton32(full_heal_mtx_out[subvol][j]);
+
+ ret = dict_set_bin(xattr, priv->pending_key[j], raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ GF_FREE(raw);
+ goto err;
+ }
+ }
+
+ return xattr;
+err:
+ if (xattr)
+ dict_unref(xattr);
+ return NULL;
+}
int
-afr_selfheal_fill_dirty (xlator_t *this, int *dirty, int subvol,
- int idx, dict_t *xdata)
+afr_selfheal_undo_pending(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ afr_transaction_type type, struct afr_reply *replies,
+ unsigned char *locked_on)
{
- void *pending_raw = NULL;
- int pending[3] = {0, };
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int j = 0;
+ unsigned char *pending = NULL;
+ int *input_dirty = NULL;
+ int **input_matrix = NULL;
+ int **full_heal_mtx_in = NULL;
+ int **full_heal_mtx_out = NULL;
+ int *output_dirty = NULL;
+ int **output_matrix = NULL;
+ dict_t *xattr = NULL;
+ dict_t *xdata = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ pending = alloca0(priv->child_count);
+
+ input_dirty = alloca0(priv->child_count * sizeof(int));
+ input_matrix = ALLOC_MATRIX(priv->child_count, int);
+ full_heal_mtx_in = ALLOC_MATRIX(priv->child_count, int);
+ full_heal_mtx_out = ALLOC_MATRIX(priv->child_count, int);
+ output_dirty = alloca0(priv->child_count * sizeof(int));
+ output_matrix = ALLOC_MATRIX(priv->child_count, int);
+
+ xdata = dict_new();
+ if (!xdata)
+ return -1;
+
+ afr_selfheal_extract_xattr(this, replies, type, input_dirty, input_matrix);
+
+ if (local->need_full_crawl)
+ afr_selfheal_extract_xattr(this, replies, AFR_DATA_TRANSACTION, NULL,
+ full_heal_mtx_in);
+
+ for (i = 0; i < priv->child_count; i++)
+ if (sinks[i] && !healed_sinks[i])
+ pending[i] = 1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ if (pending[j]) {
+ output_matrix[i][j] = 1;
+ if (type == AFR_ENTRY_TRANSACTION)
+ full_heal_mtx_out[i][j] = 1;
+ } else if (locked_on[j]) {
+ output_matrix[i][j] = -input_matrix[i][j];
+ if (type == AFR_ENTRY_TRANSACTION)
+ full_heal_mtx_out[i][j] = -full_heal_mtx_in[i][j];
+ }
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!pending[i])
+ output_dirty[i] = -input_dirty[i];
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!locked_on[i])
+ /* perform post-op only on subvols we had locked
+ and inspected on.
+ */
+ continue;
+ if (undid_pending[i])
+ /* We already unset the pending xattrs in
+ * _afr_fav_child_reset_sink_xattrs(). */
+ continue;
+
+ xattr = afr_selfheal_output_xattr(this, local->need_full_crawl, type,
+ output_dirty, output_matrix, i,
+ full_heal_mtx_out);
+ if (!xattr) {
+ continue;
+ }
- if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw))
- return -1;
+ if ((type == AFR_ENTRY_TRANSACTION) && (priv->esh_granular)) {
+ if (xdata && dict_set_int8(xdata, GF_XATTROP_PURGE_INDEX, 1))
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_DICT_SET_FAILED,
+ "Failed to set"
+ " dict value for %s",
+ GF_XATTROP_PURGE_INDEX);
+ }
- if (!pending_raw)
- return -1;
+ afr_selfheal_post_op(frame, this, inode, i, xattr, xdata);
+ dict_unref(xattr);
+ }
- memcpy (pending, pending_raw, sizeof(pending));
+ if (xdata)
+ dict_unref(xdata);
- dirty[subvol] = ntoh32 (pending[idx]);
+ return 0;
+}
- return 0;
+void
+afr_reply_copy(struct afr_reply *dst, struct afr_reply *src)
+{
+ dict_t *xdata = NULL;
+
+ dst->valid = src->valid;
+ dst->op_ret = src->op_ret;
+ dst->op_errno = src->op_errno;
+ dst->prestat = src->prestat;
+ dst->poststat = src->poststat;
+ dst->preparent = src->preparent;
+ dst->postparent = src->postparent;
+ dst->preparent2 = src->preparent2;
+ dst->postparent2 = src->postparent2;
+ if (src->xdata)
+ xdata = dict_ref(src->xdata);
+ else
+ xdata = NULL;
+ if (dst->xdata)
+ dict_unref(dst->xdata);
+ dst->xdata = xdata;
+ if (xdata && dict_get_str_boolean(xdata, "fips-mode-rchecksum",
+ _gf_false) == _gf_true) {
+ memcpy(dst->checksum, src->checksum, SHA256_DIGEST_LENGTH);
+ } else {
+ memcpy(dst->checksum, src->checksum, MD5_DIGEST_LENGTH);
+ }
+ dst->fips_mode_rchecksum = src->fips_mode_rchecksum;
}
+void
+afr_replies_copy(struct afr_reply *dst, struct afr_reply *src, int count)
+{
+ int i = 0;
+
+ if (dst == src)
+ return;
+
+ for (i = 0; i < count; i++) {
+ afr_reply_copy(&dst[i], &src[i]);
+ }
+}
int
-afr_selfheal_fill_matrix (xlator_t *this, int **matrix, int subvol,
- int idx, dict_t *xdata)
+afr_selfheal_fill_dirty(xlator_t *this, int *dirty, int subvol, int idx,
+ dict_t *xdata)
{
- int i = 0;
- void *pending_raw = NULL;
- int pending[3] = {0, };
- afr_private_t *priv = NULL;
+ void *pending_raw = NULL;
+ int pending[3] = {
+ 0,
+ };
- priv = this->private;
+ if (!dirty)
+ return 0;
- for (i = 0; i < priv->child_count; i++) {
- if (dict_get_ptr (xdata, priv->pending_key[i], &pending_raw))
- continue;
+ if (dict_get_ptr(xdata, AFR_DIRTY, &pending_raw))
+ return -1;
- if (!pending_raw)
- continue;
+ if (!pending_raw)
+ return -1;
- memcpy (pending, pending_raw, sizeof(pending));
+ memcpy(pending, pending_raw, sizeof(pending));
- matrix[subvol][i] = ntoh32 (pending[idx]);
- }
+ dirty[subvol] = ntoh32(pending[idx]);
- return 0;
+ return 0;
}
+int
+afr_selfheal_fill_matrix(xlator_t *this, int **matrix, int subvol, int idx,
+ dict_t *xdata)
+{
+ int i = 0;
+ void *pending_raw = NULL;
+ int pending[3] = {
+ 0,
+ };
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (!matrix)
+ return 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (dict_get_ptr(xdata, priv->pending_key[i], &pending_raw))
+ continue;
+
+ if (!pending_raw)
+ continue;
+
+ memcpy(pending, pending_raw, sizeof(pending));
+
+ matrix[subvol][i] = ntoh32(pending[idx]);
+ }
+
+ return 0;
+}
int
-afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies,
- afr_transaction_type type, int *dirty, int **matrix)
+afr_selfheal_extract_xattr(xlator_t *this, struct afr_reply *replies,
+ afr_transaction_type type, int *dirty, int **matrix)
{
- afr_private_t *priv = NULL;
- int i = 0;
- dict_t *xdata = NULL;
- int idx = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ dict_t *xdata = NULL;
+ int idx = -1;
+
+ idx = afr_index_for_transaction_type(type);
- idx = afr_index_for_transaction_type (type);
+ priv = this->private;
- priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
- for (i = 0; i < priv->child_count; i++) {
- if (!replies[i].xdata)
- continue;
+ if (!replies[i].xdata)
+ continue;
- xdata = replies[i].xdata;
+ xdata = replies[i].xdata;
- afr_selfheal_fill_dirty (this, dirty, i, idx, xdata);
- afr_selfheal_fill_matrix (this, matrix, i, idx, xdata);
- }
+ afr_selfheal_fill_dirty(this, dirty, i, idx, xdata);
+ afr_selfheal_fill_matrix(this, matrix, i, idx, xdata);
+ }
- return 0;
+ return 0;
}
/*
@@ -294,70 +819,566 @@ afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies,
* This can happen if data was directly modified in the backend or for snapshots
*/
void
-afr_mark_largest_file_as_source (xlator_t *this, unsigned char *sources,
- struct afr_reply *replies)
+afr_mark_largest_file_as_source(xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies)
{
- int i = 0;
- afr_private_t *priv = NULL;
- uint64_t size = 0;
-
- /* Find source with biggest file size */
- priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- if (!sources[i])
- continue;
- if (size <= replies[i].poststat.ia_size) {
- size = replies[i].poststat.ia_size;
- }
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint64_t size = 0;
+
+ /* Find source with biggest file size */
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (!replies[i].valid || replies[i].op_ret != 0) {
+ sources[i] = 0;
+ continue;
+ }
+ if (size <= replies[i].poststat.ia_size) {
+ size = replies[i].poststat.ia_size;
}
+ }
+
+ /* Mark sources with less size as not source */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (size > replies[i].poststat.ia_size)
+ sources[i] = 0;
+ }
+}
- /* Mark sources with less size as not source */
- for (i = 0; i < priv->child_count; i++) {
- if (!sources[i])
- continue;
- if (size > replies[i].poststat.ia_size)
- sources[i] = 0;
+void
+afr_mark_latest_mtime_file_as_source(xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint32_t mtime = 0;
+ uint32_t mtime_nsec = 0;
+
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (!replies[i].valid || replies[i].op_ret != 0) {
+ sources[i] = 0;
+ continue;
+ }
+ if ((mtime < replies[i].poststat.ia_mtime) ||
+ ((mtime == replies[i].poststat.ia_mtime) &&
+ (mtime_nsec < replies[i].poststat.ia_mtime_nsec))) {
+ mtime = replies[i].poststat.ia_mtime;
+ mtime_nsec = replies[i].poststat.ia_mtime_nsec;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if ((mtime > replies[i].poststat.ia_mtime) ||
+ ((mtime == replies[i].poststat.ia_mtime) &&
+ (mtime_nsec > replies[i].poststat.ia_mtime_nsec))) {
+ sources[i] = 0;
}
+ }
}
void
-afr_mark_active_sinks (xlator_t *this, unsigned char *sources,
- unsigned char *locked_on, unsigned char *sinks)
+afr_mark_active_sinks(xlator_t *this, unsigned char *sources,
+ unsigned char *locked_on, unsigned char *sinks)
{
- int i = 0;
- afr_private_t *priv = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- memset (sinks, 0, sizeof (*sinks) * priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- if (!sources[i] && locked_on[i])
- sinks[i] = 1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i] && locked_on[i])
+ sinks[i] = 1;
+ else
+ sinks[i] = 0;
+ }
+}
+
+gf_boolean_t
+afr_dict_contains_heal_op(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ dict_t *xdata_req = NULL;
+ int ret = 0;
+ int heal_op = -1;
+
+ local = frame->local;
+ xdata_req = local->xdata_req;
+ ret = dict_get_int32_sizen(xdata_req, "heal-op", &heal_op);
+ if (ret)
+ return _gf_false;
+ if (local->xdata_rsp == NULL) {
+ local->xdata_rsp = dict_new();
+ if (!local->xdata_rsp)
+ return _gf_true;
+ }
+ ret = dict_set_sizen_str_sizen(local->xdata_rsp, "sh-fail-msg",
+ SFILE_NOT_IN_SPLIT_BRAIN);
+
+ return _gf_true;
+}
+
+gf_boolean_t
+afr_can_decide_split_brain_source_sinks(struct afr_reply *replies,
+ int child_count)
+{
+ int i = 0;
+
+ for (i = 0; i < child_count; i++)
+ if (replies[i].valid != 1 || replies[i].op_ret != 0)
+ return _gf_false;
+
+ return _gf_true;
+}
+
+int
+afr_mark_split_brain_source_sinks_by_heal_op(
+ call_frame_t *frame, xlator_t *this, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on,
+ struct afr_reply *replies, afr_transaction_type type, int heal_op)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata_req = NULL;
+ dict_t *xdata_rsp = NULL;
+ int ret = 0;
+ int i = 0;
+ char *name = NULL;
+ int source = -1;
+
+ local = frame->local;
+ priv = this->private;
+ xdata_req = local->xdata_req;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_on[i])
+ if (sources[i] || !sinks[i] || !healed_sinks[i]) {
+ ret = -1;
+ goto out;
+ }
+ }
+ if (local->xdata_rsp == NULL) {
+ local->xdata_rsp = dict_new();
+ if (!local->xdata_rsp) {
+ ret = -1;
+ goto out;
+ }
+ }
+ xdata_rsp = local->xdata_rsp;
+
+ if (!afr_can_decide_split_brain_source_sinks(replies, priv->child_count)) {
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SBRAIN_HEAL_NO_GO_MSG);
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++)
+ if (locked_on[i])
+ sources[i] = 1;
+ switch (heal_op) {
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:
+ if (type == AFR_METADATA_TRANSACTION) {
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SUSE_SOURCE_BRICK_TO_HEAL);
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ afr_mark_largest_file_as_source(this, sources, replies);
+ if (AFR_COUNT(sources, priv->child_count) != 1) {
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SNO_BIGGER_FILE);
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ break;
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME:
+ if (type == AFR_METADATA_TRANSACTION) {
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SUSE_SOURCE_BRICK_TO_HEAL);
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ afr_mark_latest_mtime_file_as_source(this, sources, replies);
+ if (AFR_COUNT(sources, priv->child_count) != 1) {
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SNO_DIFF_IN_MTIME);
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ break;
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
+ ret = dict_get_str_sizen(xdata_req, "child-name", &name);
+ if (ret)
+ goto out;
+ source = afr_get_child_index_from_name(this, name);
+ if (source < 0) {
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SINVALID_BRICK_NAME);
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ if (locked_on[source] != 1) {
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SBRICK_IS_NOT_UP);
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ memset(sources, 0, sizeof(*sources) * priv->child_count);
+ sources[source] = 1;
+ break;
+ default:
+ ret = -1;
+ goto out;
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source = i;
+ break;
+ }
+ }
+ sinks[source] = 0;
+ healed_sinks[source] = 0;
+ ret = source;
+out:
+ if (ret < 0)
+ memset(sources, 0, sizeof(*sources) * priv->child_count);
+ return ret;
+}
+
+int
+afr_sh_fav_by_majority(xlator_t *this, struct afr_reply *replies,
+ inode_t *inode)
+{
+ afr_private_t *priv;
+ int vote_count = -1;
+ int fav_child = -1;
+ int i = 0;
+ int k = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid == 1) {
+ gf_msg_debug(this->name, 0,
+ "Child:%s mtime_sec = %" PRId64 ", size = %" PRIu64
+ " for gfid %s",
+ priv->children[i]->name, replies[i].poststat.ia_mtime,
+ replies[i].poststat.ia_size, uuid_utoa(inode->gfid));
+ vote_count = 0;
+ for (k = 0; k < priv->child_count; k++) {
+ if ((replies[k].poststat.ia_mtime ==
+ replies[i].poststat.ia_mtime) &&
+ (replies[k].poststat.ia_size ==
+ replies[i].poststat.ia_size)) {
+ vote_count++;
+ }
+ }
+ if (vote_count > priv->child_count / 2) {
+ fav_child = i;
+ break;
+ }
+ }
+ }
+ return fav_child;
+}
+
+/*
+ * afr_sh_fav_by_mtime: Choose favorite child by mtime.
+ */
+int
+afr_sh_fav_by_mtime(xlator_t *this, struct afr_reply *replies, inode_t *inode)
+{
+ afr_private_t *priv;
+ int fav_child = -1;
+ int i = 0;
+ uint32_t cmp_mtime = 0;
+ uint32_t cmp_mtime_nsec = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid == 1) {
+ gf_msg_debug(this->name, 0,
+ "Child:%s mtime = %" PRId64
+ ", mtime_nsec = %d for "
+ "gfid %s",
+ priv->children[i]->name, replies[i].poststat.ia_mtime,
+ replies[i].poststat.ia_mtime_nsec,
+ uuid_utoa(inode->gfid));
+ if (replies[i].poststat.ia_mtime > cmp_mtime) {
+ cmp_mtime = replies[i].poststat.ia_mtime;
+ cmp_mtime_nsec = replies[i].poststat.ia_mtime_nsec;
+ fav_child = i;
+ } else if ((replies[i].poststat.ia_mtime == cmp_mtime) &&
+ (replies[i].poststat.ia_mtime_nsec > cmp_mtime_nsec)) {
+ cmp_mtime = replies[i].poststat.ia_mtime;
+ cmp_mtime_nsec = replies[i].poststat.ia_mtime_nsec;
+ fav_child = i;
+ }
+ }
+ }
+ return fav_child;
+}
+
+/*
+ * afr_sh_fav_by_ctime: Choose favorite child by ctime.
+ */
+int
+afr_sh_fav_by_ctime(xlator_t *this, struct afr_reply *replies, inode_t *inode)
+{
+ afr_private_t *priv;
+ int fav_child = -1;
+ int i = 0;
+ uint32_t cmp_ctime = 0;
+ uint32_t cmp_ctime_nsec = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid == 1) {
+ gf_msg_debug(this->name, 0,
+ "Child:%s ctime = %" PRId64
+ ", ctime_nsec = %d for "
+ "gfid %s",
+ priv->children[i]->name, replies[i].poststat.ia_ctime,
+ replies[i].poststat.ia_ctime_nsec,
+ uuid_utoa(inode->gfid));
+ if (replies[i].poststat.ia_ctime > cmp_ctime) {
+ cmp_ctime = replies[i].poststat.ia_ctime;
+ cmp_ctime_nsec = replies[i].poststat.ia_ctime_nsec;
+ fav_child = i;
+ } else if ((replies[i].poststat.ia_ctime == cmp_ctime) &&
+ (replies[i].poststat.ia_ctime_nsec > cmp_ctime_nsec)) {
+ cmp_ctime = replies[i].poststat.ia_ctime;
+ cmp_ctime_nsec = replies[i].poststat.ia_ctime_nsec;
+ fav_child = i;
+ }
}
+ }
+ return fav_child;
+}
+
+/*
+ * afr_sh_fav_by_size: Choose favorite child by size
+ * when not all files are of zero size.
+ */
+int
+afr_sh_fav_by_size(xlator_t *this, struct afr_reply *replies, inode_t *inode)
+{
+ afr_private_t *priv;
+ int fav_child = -1;
+ int i = 0;
+ uint64_t cmp_sz = 0;
+
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid) {
+ continue;
+ }
+ gf_msg_debug(this->name, 0,
+ "Child:%s file size = %" PRIu64 " for gfid %s",
+ priv->children[i]->name, replies[i].poststat.ia_size,
+ uuid_utoa(inode->gfid));
+ if (replies[i].poststat.ia_type == IA_IFDIR) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ "Cannot perform selfheal on %s. "
+ "Size policy is not applicable to directories.",
+ uuid_utoa(inode->gfid));
+ break;
+ }
+ if (replies[i].poststat.ia_size > cmp_sz) {
+ cmp_sz = replies[i].poststat.ia_size;
+ fav_child = i;
+ } else if (replies[i].poststat.ia_size == cmp_sz) {
+ fav_child = -1;
+ }
+ }
+ if (fav_child == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "No bigger file");
+ }
+ return fav_child;
+}
+
+int
+afr_sh_get_fav_by_policy(xlator_t *this, struct afr_reply *replies,
+ inode_t *inode, char **policy_str)
+{
+ afr_private_t *priv = NULL;
+ int fav_child = -1;
+
+ priv = this->private;
+ if (!afr_can_decide_split_brain_source_sinks(replies, priv->child_count)) {
+ return -1;
+ }
+
+ switch (priv->fav_child_policy) {
+ case AFR_FAV_CHILD_BY_SIZE:
+ fav_child = afr_sh_fav_by_size(this, replies, inode);
+ if (policy_str && fav_child >= 0) {
+ *policy_str = "SIZE";
+ }
+ break;
+ case AFR_FAV_CHILD_BY_CTIME:
+ fav_child = afr_sh_fav_by_ctime(this, replies, inode);
+ if (policy_str && fav_child >= 0) {
+ *policy_str = "CTIME";
+ }
+ break;
+ case AFR_FAV_CHILD_BY_MTIME:
+ fav_child = afr_sh_fav_by_mtime(this, replies, inode);
+ if (policy_str && fav_child >= 0) {
+ *policy_str = "MTIME";
+ }
+ break;
+ case AFR_FAV_CHILD_BY_MAJORITY:
+ fav_child = afr_sh_fav_by_majority(this, replies, inode);
+ if (policy_str && fav_child >= 0) {
+ *policy_str = "MAJORITY";
+ }
+ break;
+ case AFR_FAV_CHILD_NONE:
+ default:
+ break;
+ }
+
+ return fav_child;
+}
+
+int
+afr_mark_split_brain_source_sinks_by_policy(
+ call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on,
+ struct afr_reply *replies, afr_transaction_type type)
+{
+ afr_private_t *priv = NULL;
+ int fav_child = -1;
+ char mtime_str[256];
+ char ctime_str[256];
+ char *policy_str = NULL;
+ struct tm *tm_ptr;
+ time_t time;
+
+ priv = this->private;
+
+ fav_child = afr_sh_get_fav_by_policy(this, replies, inode, &policy_str);
+ if (fav_child == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ "No child selected by favorite-child policy.");
+ } else if (fav_child > priv->child_count - 1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ "Invalid child (%d) "
+ "selected by policy %s.",
+ fav_child, policy_str);
+ } else if (fav_child >= 0) {
+ time = replies[fav_child].poststat.ia_mtime;
+ tm_ptr = localtime(&time);
+ strftime(mtime_str, sizeof(mtime_str), "%Y-%m-%d %H:%M:%S", tm_ptr);
+ time = replies[fav_child].poststat.ia_ctime;
+ tm_ptr = localtime(&time);
+ strftime(ctime_str, sizeof(ctime_str), "%Y-%m-%d %H:%M:%S", tm_ptr);
+
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ "Source %s selected as authentic to resolve conflicting data "
+ "in file (gfid:%s) by %s (%" PRIu64
+ " bytes @ %s mtime, %s "
+ "ctime).",
+ priv->children[fav_child]->name, uuid_utoa(inode->gfid),
+ policy_str, replies[fav_child].poststat.ia_size, mtime_str,
+ ctime_str);
+
+ sources[fav_child] = 1;
+ sinks[fav_child] = 0;
+ healed_sinks[fav_child] = 0;
+ }
+ return fav_child;
}
gf_boolean_t
-afr_dict_contains_heal_op (call_frame_t *frame)
+afr_is_file_empty_on_all_children(afr_private_t *priv,
+ struct afr_reply *replies)
{
- afr_local_t *local = NULL;
- dict_t *xdata_req = NULL;
- int ret = 0;
- int heal_op = -1;
+ int i = 0;
- local = frame->local;
- xdata_req = local->xdata_req;
- ret = dict_get_int32 (xdata_req, "heal-op", &heal_op);
- if (ret)
- return _gf_false;
- if (local->xdata_rsp == NULL) {
- local->xdata_rsp = dict_new();
- if (!local->xdata_rsp)
- return _gf_true;
+ for (i = 0; i < priv->child_count; i++) {
+ if ((!replies[i].valid) || (replies[i].op_ret != 0) ||
+ (replies[i].poststat.ia_size != 0))
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+int
+afr_mark_source_sinks_if_file_empty(xlator_t *this, unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ afr_transaction_type type)
+{
+ int source = -1;
+ int i = 0;
+ afr_private_t *priv = this->private;
+ struct iatt stbuf = {
+ 0,
+ };
+
+ if ((AFR_COUNT(locked_on, priv->child_count) < priv->child_count) ||
+ (afr_success_count(replies, priv->child_count) < priv->child_count))
+ return -1;
+
+ if (type == AFR_DATA_TRANSACTION) {
+ if (!afr_is_file_empty_on_all_children(priv, replies))
+ return -1;
+ goto mark;
+ }
+
+ /*For AFR_METADATA_TRANSACTION, metadata must be same on all bricks.*/
+ stbuf = replies[0].poststat;
+ for (i = 1; i < priv->child_count; i++) {
+ if ((!IA_EQUAL(stbuf, replies[i].poststat, type)) ||
+ (!IA_EQUAL(stbuf, replies[i].poststat, uid)) ||
+ (!IA_EQUAL(stbuf, replies[i].poststat, gid)) ||
+ (!IA_EQUAL(stbuf, replies[i].poststat, prot)))
+ return -1;
+ }
+ for (i = 1; i < priv->child_count; i++) {
+ if (!afr_xattrs_are_equal(replies[0].xdata, replies[i].xdata))
+ return -1;
+ }
+
+mark:
+ /* data/metadata is same on all bricks. Pick one of them as source. Rest
+ * are sinks.*/
+ for (i = 0; i < priv->child_count; i++) {
+ if (source == -1) {
+ source = i;
+ sources[i] = 1;
+ sinks[i] = 0;
+ healed_sinks[i] = 0;
+ continue;
}
- ret = dict_set_str (local->xdata_rsp, "sh-fail-msg",
- "File not in split-brain");
+ sources[i] = 0;
+ sinks[i] = 1;
+ healed_sinks[i] = 1;
+ }
- return _gf_true;
+ return source;
}
/* Return a source depending on the type of heal_op, and set sources[source],
@@ -368,121 +1389,156 @@ afr_dict_contains_heal_op (call_frame_t *frame)
* sinks[node] are 1. This should be the case if the file is in split-brain.
*/
int
-afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this,
- unsigned char *sources,
- unsigned char *sinks,
- unsigned char *healed_sinks,
- unsigned char *locked_on,
- struct afr_reply *replies,
- afr_transaction_type type)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- dict_t *xdata_req = NULL;
- dict_t *xdata_rsp = NULL;
- int ret = 0;
- int heal_op = -1;
- int i = 0;
- char *name = NULL;
- int source = -1;
-
- local = frame->local;
- priv = this->private;
- xdata_req = local->xdata_req;
-
- ret = dict_get_int32 (xdata_req, "heal-op", &heal_op);
- if (ret)
- goto out;
-
- for (i = 0; i < priv->child_count; i++) {
- if (locked_on[i])
- if (sources[i] || !sinks[i] || !healed_sinks[i]) {
- ret = -1;
- goto out;
- }
- }
- if (local->xdata_rsp == NULL) {
- local->xdata_rsp = dict_new();
- if (!local->xdata_rsp) {
- ret = -1;
- goto out;
- }
+afr_mark_split_brain_source_sinks(
+ call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on,
+ struct afr_reply *replies, afr_transaction_type type)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata_req = NULL;
+ int heal_op = -1;
+ int ret = -1;
+ int source = -1;
+
+ local = frame->local;
+ priv = this->private;
+ xdata_req = local->xdata_req;
+
+ source = afr_mark_source_sinks_if_file_empty(
+ this, sources, sinks, healed_sinks, locked_on, replies, type);
+ if (source >= 0)
+ return source;
+
+ ret = dict_get_int32_sizen(xdata_req, "heal-op", &heal_op);
+ if (ret)
+ goto autoheal;
+
+ source = afr_mark_split_brain_source_sinks_by_heal_op(
+ frame, this, sources, sinks, healed_sinks, locked_on, replies, type,
+ heal_op);
+ return source;
+
+autoheal:
+ /* Automatically heal if fav_child_policy is set. */
+ if (priv->fav_child_policy != AFR_FAV_CHILD_NONE) {
+ source = afr_mark_split_brain_source_sinks_by_policy(
+ frame, this, inode, sources, sinks, healed_sinks, locked_on,
+ replies, type);
+ if (source != -1) {
+ ret = dict_set_int32_sizen(xdata_req, "fav-child-policy", 1);
+ if (ret)
+ return -1;
}
- xdata_rsp = local->xdata_rsp;
+ }
- switch (heal_op) {
- case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:
- if (type == AFR_METADATA_TRANSACTION) {
- ret = dict_set_str (xdata_rsp, "sh-fail-msg",
- "Use source-brick option to"
- " heal metadata split-brain");
- if (!ret)
- ret = -1;
- goto out;
- }
- for (i = 0 ; i < priv->child_count; i++)
- if (locked_on[i])
- sources[i] = 1;
- afr_mark_largest_file_as_source (this, sources, replies);
- if (AFR_COUNT (sources, priv->child_count) != 1) {
- ret = dict_set_str (xdata_rsp, "sh-fail-msg",
- "No bigger file");
- if (!ret)
- ret = -1;
- goto out;
- }
- for (i = 0 ; i < priv->child_count; i++)
- if (sources[i])
- source = i;
- sinks[source] = 0;
- healed_sinks[source] = 0;
- break;
- case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
- ret = dict_get_str (xdata_req, "child-name", &name);
- if (ret)
- goto out;
- source = afr_get_child_index_from_name (this, name);
- if (source < 0) {
- ret = dict_set_str (xdata_rsp, "sh-fail-msg",
- "Invalid brick name");
- if (!ret)
- ret = -1;
- goto out;
- }
- if (locked_on[source] != 1) {
- ret = dict_set_str (xdata_rsp, "sh-fail-msg",
- "Brick is not up");
- if (!ret)
- ret = -1;
- goto out;
- }
- sources[source] = 1;
- sinks[source] = 0;
- healed_sinks[source] = 0;
- break;
- default:
- ret = -1;
- goto out;
- }
- ret = source;
-out:
- return ret;
+ return source;
+}
+int
+_afr_fav_child_reset_sink_xattrs(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, int source,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ afr_transaction_type type,
+ unsigned char *locked_on,
+ struct afr_reply *replies)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int *input_dirty = NULL;
+ int **input_matrix = NULL;
+ int *output_dirty = NULL;
+ int **output_matrix = NULL;
+ dict_t *xattr = NULL;
+ dict_t *xdata = NULL;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (!dict_get_sizen(local->xdata_req, "fav-child-policy"))
+ return 0;
+
+ xdata = dict_new();
+ if (!xdata)
+ return -1;
+
+ input_dirty = alloca0(priv->child_count * sizeof(int));
+ input_matrix = ALLOC_MATRIX(priv->child_count, int);
+ output_dirty = alloca0(priv->child_count * sizeof(int));
+ output_matrix = ALLOC_MATRIX(priv->child_count, int);
+
+ afr_selfheal_extract_xattr(this, replies, type, input_dirty, input_matrix);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source || !healed_sinks[i])
+ continue;
+ output_dirty[i] = -input_dirty[i];
+ output_matrix[i][source] = -input_matrix[i][source];
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i] || !locked_on[i])
+ continue;
+ xattr = afr_selfheal_output_xattr(this, _gf_false, type, output_dirty,
+ output_matrix, i, NULL);
+
+ afr_selfheal_post_op(frame, this, inode, i, xattr, xdata);
+
+ undid_pending[i] = 1;
+ dict_unref(xattr);
+ }
+
+ if (xdata)
+ dict_unref(xdata);
+
+ return 0;
}
gf_boolean_t
-afr_does_witness_exist (xlator_t *this, uint64_t *witness)
+afr_does_witness_exist(xlator_t *this, uint64_t *witness)
{
- int i = 0;
- afr_private_t *priv = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- if (witness[i])
- return _gf_true;
+ for (i = 0; i < priv->child_count; i++) {
+ if (witness[i])
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+unsigned int
+afr_get_quorum_count(afr_private_t *priv)
+{
+ if (priv->quorum_count == AFR_QUORUM_AUTO) {
+ return priv->child_count / 2 + 1;
+ } else {
+ return priv->quorum_count;
+ }
+}
+
+void
+afr_selfheal_post_op_failure_accounting(afr_private_t *priv, char *accused,
+ unsigned char *sources,
+ unsigned char *locked_on)
+{
+ int i = 0;
+ unsigned int quorum_count = 0;
+
+ if (AFR_COUNT(sources, priv->child_count) != 0)
+ return;
+
+ quorum_count = afr_get_quorum_count(priv);
+ for (i = 0; i < priv->child_count; i++) {
+ if ((accused[i] < quorum_count) && locked_on[i]) {
+ sources[i] = 1;
}
- return _gf_false;
+ }
+ return;
}
/*
@@ -505,566 +1561,713 @@ afr_does_witness_exist (xlator_t *this, uint64_t *witness)
*/
int
-afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this,
- struct afr_reply *replies,
- afr_transaction_type type,
- unsigned char *locked_on, unsigned char *sources,
- unsigned char *sinks, uint64_t *witness)
-{
- afr_private_t *priv = NULL;
- int i = 0;
- int j = 0;
- int *dirty = NULL; /* Denotes if dirty xattr is set */
- int **matrix = NULL;/* Changelog matrix */
- char *accused = NULL;/* Accused others without any self-accusal */
- char *pending = NULL;/* Have pending operations on others */
- char *self_accused = NULL; /* Accused itself */
-
- priv = this->private;
-
- dirty = alloca0 (priv->child_count * sizeof (int));
- accused = alloca0 (priv->child_count);
- pending = alloca0 (priv->child_count);
- self_accused = alloca0 (priv->child_count);
- matrix = ALLOC_MATRIX(priv->child_count, int);
- memset (witness, 0, sizeof (*witness) * priv->child_count);
-
- if (afr_success_count (replies,
- priv->child_count) < AFR_SH_MIN_PARTICIPANTS) {
- /* Treat this just like locks not being acquired */
- return -ENOTCONN;
- }
-
- /* First construct the pending matrix for further analysis */
- afr_selfheal_extract_xattr (this, replies, type, dirty, matrix);
-
- /* short list all self-accused */
+afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies,
+ afr_transaction_type type, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ uint64_t *witness, unsigned char *pflag)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int j = 0;
+ int *dirty = NULL; /* Denotes if dirty xattr is set */
+ int **matrix = NULL; /* Changelog matrix */
+ char *accused = NULL; /* Accused others without any self-accusal */
+ char *pending = NULL; /* Have pending operations on others */
+ char *self_accused = NULL; /* Accused itself */
+
+ priv = this->private;
+
+ dirty = alloca0(priv->child_count * sizeof(int));
+ accused = alloca0(priv->child_count);
+ pending = alloca0(priv->child_count);
+ self_accused = alloca0(priv->child_count);
+ matrix = ALLOC_MATRIX(priv->child_count, int);
+ memset(witness, 0, sizeof(*witness) * priv->child_count);
+
+ /* First construct the pending matrix for further analysis */
+ afr_selfheal_extract_xattr(this, replies, type, dirty, matrix);
+
+ if (pflag) {
for (i = 0; i < priv->child_count; i++) {
- if (matrix[i][i])
- self_accused[i] = 1;
+ for (j = 0; j < priv->child_count; j++)
+ if (matrix[i][j])
+ *pflag |= PFLAG_PENDING;
+ if (*pflag)
+ break;
}
+ }
+
+ if (afr_success_count(replies, priv->child_count) < priv->child_count) {
+ /* Treat this just like locks not being acquired */
+ return -ENOTCONN;
+ }
+
+ /* short list all self-accused */
+ for (i = 0; i < priv->child_count; i++) {
+ if (matrix[i][i])
+ self_accused[i] = 1;
+ }
+
+ /* Next short list all accused to exclude them from being sources */
+ /* Self-accused can't accuse others as they are FOOLs */
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ if (matrix[i][j]) {
+ if (!self_accused[i])
+ accused[j] += 1;
+ if (i != j)
+ pending[i] += 1;
+ }
+ }
+ }
- /* Next short list all accused to exclude them from being sources */
- /* Self-accused can't accuse others as they are FOOLs */
- for (i = 0; i < priv->child_count; i++) {
- for (j = 0; j < priv->child_count; j++) {
- if (matrix[i][j]) {
- if (!self_accused[i])
- accused[j] = 1;
-
- if (i != j)
- pending[i] = 1;
- }
- }
- }
-
- /* Short list all non-accused as sources */
- memset (sources, 0, priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- if (!accused[i] && locked_on[i])
- sources[i] = 1;
- }
-
- /* Everyone accused by non-self-accused sources are sinks */
- memset (sinks, 0, priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- if (!sources[i])
- continue;
- if (self_accused[i])
- continue;
- for (j = 0; j < priv->child_count; j++) {
- if (matrix[i][j])
- sinks[j] = 1;
- }
+ /* Short list all non-accused as sources */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!accused[i] && locked_on[i])
+ sources[i] = 1;
+ else
+ sources[i] = 0;
+ }
+
+ /* Everyone accused by non-self-accused sources are sinks */
+ memset(sinks, 0, priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (self_accused[i])
+ continue;
+ for (j = 0; j < priv->child_count; j++) {
+ if (matrix[i][j])
+ sinks[j] = 1;
+ }
+ }
+
+ /* For breaking ties provide with number of fops they witnessed */
+
+ /*
+ * count the pending fops witnessed from itself to others when it is
+ * self-accused
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!self_accused[i])
+ continue;
+ for (j = 0; j < priv->child_count; j++) {
+ if (i == j)
+ continue;
+ witness[i] += matrix[i][j];
}
+ }
- /* For breaking ties provide with number of fops they witnessed */
+ if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION)
+ afr_selfheal_post_op_failure_accounting(priv, accused, sources,
+ locked_on);
- /*
- * count the pending fops witnessed from itself to others when it is
- * self-accused
- */
+ /* If no sources, all locked nodes are sinks - split brain */
+ if (AFR_COUNT(sources, priv->child_count) == 0) {
for (i = 0; i < priv->child_count; i++) {
- if (!self_accused[i])
- continue;
- for (j = 0; j < priv->child_count; j++) {
- if (i == j)
- continue;
- witness[i] += matrix[i][j];
- }
+ if (locked_on[i])
+ sinks[i] = 1;
}
-
- /* If no sources, all locked nodes are sinks - split brain */
- if (AFR_COUNT (sources, priv->child_count) == 0) {
- for (i = 0; i < priv->child_count; i++) {
- if (locked_on[i])
- sinks[i] = 1;
- }
+ if (pflag)
+ *pflag |= PFLAG_SBRAIN;
+ }
+
+ /* One more class of witness similar to dirty in v2 is where no pending
+ * exists but we have self-accusing markers. This can happen in afr-v1
+ * if the brick crashes just after doing xattrop on self but
+ * before xattrop on the other xattrs on the brick in pre-op. */
+ if (AFR_COUNT(pending, priv->child_count) == 0) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (self_accused[i])
+ witness[i] += matrix[i][i];
}
-
- /* In afr-v1 if a file is self-accused but didn't have any pending
+ } else {
+ /* In afr-v1 if a file is self-accused and has pending
* operations on others then it is similar to 'dirty' in afr-v2.
* Consider such cases as witness.
*/
for (i = 0; i < priv->child_count; i++) {
- if (self_accused[i] && !pending[i])
- witness[i] += matrix[i][i];
+ if (self_accused[i] && pending[i])
+ witness[i] += matrix[i][i];
}
+ }
- /* count the number of dirty fops witnessed */
- for (i = 0; i < priv->child_count; i++)
- witness[i] += dirty[i];
+ /* count the number of dirty fops witnessed */
+ for (i = 0; i < priv->child_count; i++)
+ witness[i] += dirty[i];
- return 0;
+ return 0;
}
void
-afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type,
- int source, unsigned char *healed_sinks)
-{
- char *status = NULL;
- char *sinks_str = NULL;
- char *p = NULL;
- afr_private_t *priv = NULL;
- gf_loglevel_t loglevel = GF_LOG_NONE;
- int i = 0;
-
- priv = this->private;
- sinks_str = alloca0 (priv->child_count * 8);
- p = sinks_str;
- for (i = 0; i < priv->child_count; i++) {
- if (!healed_sinks[i])
- continue;
- p += sprintf (p, "%d ", i);
- }
-
- if (ret < 0) {
- status = "Failed";
- loglevel = GF_LOG_DEBUG;
- } else {
- status = "Completed";
- loglevel = GF_LOG_INFO;
+afr_log_selfheal(uuid_t gfid, xlator_t *this, int ret, char *type, int source,
+ unsigned char *sources, unsigned char *healed_sinks)
+{
+ char *status = NULL;
+ char *sinks_str = NULL;
+ char *p = NULL;
+ char *sources_str = NULL;
+ char *q = NULL;
+ afr_private_t *priv = NULL;
+ gf_loglevel_t loglevel = GF_LOG_NONE;
+ int i = 0;
+
+ priv = this->private;
+ sinks_str = alloca0(priv->child_count * 8);
+ p = sinks_str;
+ sources_str = alloca0(priv->child_count * 8);
+ q = sources_str;
+ for (i = 0; i < priv->child_count; i++) {
+ if (healed_sinks[i])
+ p += sprintf(p, "%d ", i);
+ if (sources[i]) {
+ if (source == i) {
+ q += sprintf(q, "[%d] ", i);
+ } else {
+ q += sprintf(q, "%d ", i);
+ }
}
-
- gf_log (this->name, loglevel, "%s %s selfheal on %s. "
- "source=%d sinks=%s", status, type, uuid_utoa (gfid),
- source, sinks_str);
+ }
+
+ if (ret < 0) {
+ status = "Failed";
+ loglevel = GF_LOG_DEBUG;
+ } else {
+ status = "Completed";
+ loglevel = GF_LOG_INFO;
+ }
+
+ gf_msg(this->name, loglevel, 0, AFR_MSG_SELF_HEAL_INFO,
+ "%s %s selfheal on %s. "
+ "sources=%s sinks=%s",
+ status, type, uuid_utoa(gfid), sources_str, sinks_str);
}
int
-afr_selfheal_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata, struct iatt *parbuf)
+afr_selfheal_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *parbuf)
{
- afr_local_t *local = NULL;
- int i = -1;
+ afr_local_t *local = NULL;
+ int i = -1;
+ GF_UNUSED int ret = -1;
+ int8_t need_heal = 1;
+
+ local = frame->local;
+ i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (buf)
+ local->replies[i].poststat = *buf;
+ if (parbuf)
+ local->replies[i].postparent = *parbuf;
+ if (xdata) {
+ local->replies[i].xdata = dict_ref(xdata);
+ ret = dict_get_int8(xdata, "link-count", &need_heal);
+ }
+
+ local->replies[i].need_heal = need_heal;
+ syncbarrier_wake(&local->barrier);
+
+ return 0;
+}
- local = frame->local;
- i = (long) cookie;
+inode_t *
+afr_selfheal_unlocked_lookup_on(call_frame_t *frame, inode_t *parent,
+ const char *name, struct afr_reply *replies,
+ unsigned char *lookup_on, dict_t *xattr)
+{
+ loc_t loc = {
+ 0,
+ };
+ dict_t *xattr_req = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
- local->replies[i].valid = 1;
- local->replies[i].op_ret = op_ret;
- local->replies[i].op_errno = op_errno;
- if (buf)
- local->replies[i].poststat = *buf;
- if (parbuf)
- local->replies[i].postparent = *parbuf;
- if (xdata)
- local->replies[i].xdata = dict_ref (xdata);
+ local = frame->local;
+ priv = frame->this->private;
- syncbarrier_wake (&local->barrier);
+ xattr_req = dict_new();
+ if (!xattr_req)
+ return NULL;
- return 0;
-}
+ if (xattr)
+ dict_copy(xattr, xattr_req);
+ if (afr_xattr_req_prepare(frame->this, xattr_req) != 0) {
+ dict_unref(xattr_req);
+ return NULL;
+ }
-inode_t *
-afr_selfheal_unlocked_lookup_on (call_frame_t *frame, inode_t *parent,
- const char *name, struct afr_reply *replies,
- unsigned char *lookup_on, dict_t *xattr)
-{
- loc_t loc = {0, };
- dict_t *xattr_req = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- inode_t *inode = NULL;
+ inode = inode_new(parent->table);
+ if (!inode) {
+ dict_unref(xattr_req);
+ return NULL;
+ }
- local = frame->local;
- priv = frame->this->private;
+ loc.parent = inode_ref(parent);
+ gf_uuid_copy(loc.pargfid, parent->gfid);
+ loc.name = name;
+ loc.inode = inode_ref(inode);
- xattr_req = dict_new ();
- if (!xattr_req)
- return NULL;
+ AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xattr_req);
- if (xattr)
- dict_copy (xattr, xattr_req);
+ afr_replies_copy(replies, local->replies, priv->child_count);
- if (afr_xattr_req_prepare (frame->this, xattr_req) != 0) {
- dict_destroy (xattr_req);
- return NULL;
- }
+ loc_wipe(&loc);
+ dict_unref(xattr_req);
- inode = inode_new (parent->table);
- if (!inode) {
- dict_destroy (xattr_req);
- return NULL;
- }
-
- loc.parent = inode_ref (parent);
- gf_uuid_copy (loc.pargfid, parent->gfid);
- loc.name = name;
- loc.inode = inode_ref (inode);
+ return inode;
+}
- AFR_ONLIST (lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
- xattr_req);
+static int
+afr_set_multi_dom_lock_count_request(xlator_t *this, dict_t *dict)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ char *key1 = NULL;
+ char *key2 = NULL;
+
+ priv = this->private;
+ key1 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(this->name));
+ key2 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(priv->sh_domain));
+
+ ret = dict_set_uint32(dict, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS, 1);
+ if (ret)
+ return ret;
- afr_replies_copy (replies, local->replies, priv->child_count);
+ sprintf(key1, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, this->name);
+ ret = dict_set_uint32(dict, key1, 1);
+ if (ret)
+ return ret;
- loc_wipe (&loc);
- dict_unref (xattr_req);
+ sprintf(key2, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, priv->sh_domain);
+ ret = dict_set_uint32(dict, key2, 1);
+ if (ret)
+ return ret;
- return inode;
+ return 0;
}
int
-afr_selfheal_unlocked_discover_on (call_frame_t *frame, inode_t *inode,
- uuid_t gfid, struct afr_reply *replies,
- unsigned char *discover_on)
+afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
+ uuid_t gfid, struct afr_reply *replies,
+ unsigned char *discover_on, dict_t *dict)
{
- loc_t loc = {0, };
- dict_t *xattr_req = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ loc_t loc = {
+ 0,
+ };
+ dict_t *xattr_req = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local = frame->local;
- priv = frame->this->private;
+ local = frame->local;
+ priv = frame->this->private;
- xattr_req = dict_new ();
- if (!xattr_req)
- return -ENOMEM;
+ xattr_req = dict_new();
+ if (!xattr_req)
+ return -ENOMEM;
+ if (dict)
+ dict_copy(dict, xattr_req);
- if (afr_xattr_req_prepare (frame->this, xattr_req) != 0) {
- dict_destroy (xattr_req);
- return -ENOMEM;
- }
+ if (afr_xattr_req_prepare(frame->this, xattr_req) != 0) {
+ dict_unref(xattr_req);
+ return -ENOMEM;
+ }
- loc.inode = inode_ref (inode);
- gf_uuid_copy (loc.gfid, gfid);
+ if (afr_set_multi_dom_lock_count_request(frame->this, xattr_req)) {
+ dict_unref(xattr_req);
+ return -1;
+ }
- AFR_ONLIST (discover_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
- xattr_req);
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, gfid);
- afr_replies_copy (replies, local->replies, priv->child_count);
+ AFR_ONLIST(discover_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xattr_req);
- loc_wipe (&loc);
- dict_unref (xattr_req);
+ afr_replies_copy(replies, local->replies, priv->child_count);
- return 0;
+ loc_wipe(&loc);
+ dict_unref(xattr_req);
+
+ return 0;
}
int
-afr_selfheal_unlocked_discover (call_frame_t *frame, inode_t *inode,
- uuid_t gfid, struct afr_reply *replies)
+afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
+ struct afr_reply *replies)
{
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ dict_t *dict = NULL;
+
+ local = frame->local;
- priv = frame->this->private;
+ if (local->xattr_req)
+ dict = local->xattr_req;
- return afr_selfheal_unlocked_discover_on (frame, inode, gfid, replies,
- priv->child_up);
+ return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies,
+ local->child_up, dict);
}
unsigned int
-afr_success_count (struct afr_reply *replies, unsigned int count)
+afr_success_count(struct afr_reply *replies, unsigned int count)
{
- int i = 0;
- unsigned int success = 0;
+ int i = 0;
+ unsigned int success = 0;
- for (i = 0; i < count; i++)
- if (replies[i].valid && replies[i].op_ret == 0)
- success++;
- return success;
+ for (i = 0; i < count; i++)
+ if (replies[i].valid && replies[i].op_ret == 0)
+ success++;
+ return success;
}
int
-afr_selfheal_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+afr_selfheal_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int i = 0;
+ afr_local_t *local = NULL;
+ int i = 0;
- local = frame->local;
- i = (long) cookie;
+ local = frame->local;
+ i = (long)cookie;
- local->replies[i].valid = 1;
- local->replies[i].op_ret = op_ret;
- local->replies[i].op_errno = op_errno;
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
- syncbarrier_wake (&local->barrier);
+ syncbarrier_wake(&local->barrier);
- return 0;
+ return 0;
}
-
int
-afr_selfheal_locked_fill (call_frame_t *frame, xlator_t *this,
- unsigned char *locked_on)
+afr_locked_fill(call_frame_t *frame, xlator_t *this, unsigned char *locked_on)
{
- int i = 0;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int count = 0;
-
- local = frame->local;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->replies[i].valid && local->replies[i].op_ret == 0) {
- locked_on[i] = 1;
- count++;
- } else {
- locked_on[i] = 0;
- }
- }
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && local->replies[i].op_ret == 0) {
+ locked_on[i] = 1;
+ count++;
+ } else {
+ locked_on[i] = 0;
+ }
+ }
- return count;
+ return count;
}
-
int
-afr_selfheal_tryinodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
- char *dom, off_t off, size_t size,
- unsigned char *locked_on)
+afr_selfheal_tryinodelk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on)
{
- loc_t loc = {0,};
- struct gf_flock flock = {0, };
+ loc_t loc = {
+ 0,
+ };
+ struct gf_flock flock = {
+ 0,
+ };
- loc.inode = inode_ref (inode);
- gf_uuid_copy (loc.gfid, inode->gfid);
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
- flock.l_type = F_WRLCK;
- flock.l_start = off;
- flock.l_len = size;
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
- AFR_ONALL (frame, afr_selfheal_lock_cbk, inodelk, dom,
- &loc, F_SETLK, &flock, NULL);
+ AFR_ONALL(frame, afr_selfheal_lock_cbk, inodelk, dom, &loc, F_SETLK, &flock,
+ NULL);
- loc_wipe (&loc);
+ loc_wipe(&loc);
- return afr_selfheal_locked_fill (frame, this, locked_on);
+ return afr_locked_fill(frame, this, locked_on);
}
-
int
-afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
- char *dom, off_t off, size_t size,
- unsigned char *locked_on)
+afr_selfheal_inodelk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on)
{
- loc_t loc = {0,};
- struct gf_flock flock = {0, };
- afr_local_t *local = NULL;
- int i = 0;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- local = frame->local;
-
- loc.inode = inode_ref (inode);
- gf_uuid_copy (loc.gfid, inode->gfid);
-
- flock.l_type = F_WRLCK;
- flock.l_start = off;
- flock.l_len = size;
-
- AFR_ONALL (frame, afr_selfheal_lock_cbk, inodelk, dom,
- &loc, F_SETLK, &flock, NULL);
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->replies[i].op_ret == -1 &&
- local->replies[i].op_errno == EAGAIN) {
- afr_selfheal_locked_fill (frame, this, locked_on);
- afr_selfheal_uninodelk (frame, this, inode, dom, off,
- size, locked_on);
-
- AFR_SEQ (frame, afr_selfheal_lock_cbk, inodelk, dom,
- &loc, F_SETLKW, &flock, NULL);
- break;
- }
- }
+ loc_t loc = {
+ 0,
+ };
+ struct gf_flock flock = {
+ 0,
+ };
+ afr_local_t *local = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
+
+ AFR_ONALL(frame, afr_selfheal_lock_cbk, inodelk, dom, &loc, F_SETLK, &flock,
+ NULL);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == -1 &&
+ local->replies[i].op_errno == EAGAIN) {
+ afr_locked_fill(frame, this, locked_on);
+ afr_selfheal_uninodelk(frame, this, inode, dom, off, size,
+ locked_on);
+
+ AFR_SEQ(frame, afr_selfheal_lock_cbk, inodelk, dom, &loc, F_SETLKW,
+ &flock, NULL);
+ break;
+ }
+ }
- loc_wipe (&loc);
+ loc_wipe(&loc);
- return afr_selfheal_locked_fill (frame, this, locked_on);
+ return afr_locked_fill(frame, this, locked_on);
}
+static void
+afr_get_lock_and_eagain_counts(afr_private_t *priv, struct afr_reply *replies,
+ int *lock_count, int *eagain_count)
+{
+ int i = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (replies[i].op_ret == 0) {
+ (*lock_count)++;
+ } else if (replies[i].op_ret == -1 && replies[i].op_errno == EAGAIN) {
+ (*eagain_count)++;
+ }
+ }
+}
+/*Do blocking locks if number of locks acquired is majority and there were some
+ * EAGAINs. Useful for odd-way replication*/
int
-afr_selfheal_uninodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
- char *dom, off_t off, size_t size,
- const unsigned char *locked_on)
+afr_selfheal_tie_breaker_inodelk(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, char *dom, off_t off,
+ size_t size, unsigned char *locked_on)
{
- loc_t loc = {0,};
- struct gf_flock flock = {0, };
+ loc_t loc = {
+ 0,
+ };
+ struct gf_flock flock = {
+ 0,
+ };
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int lock_count = 0;
+ int eagain_count = 0;
+ priv = this->private;
+ local = frame->local;
- loc.inode = inode_ref (inode);
- gf_uuid_copy (loc.gfid, inode->gfid);
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
- flock.l_type = F_UNLCK;
- flock.l_start = off;
- flock.l_len = size;
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
- AFR_ONLIST (locked_on, frame, afr_selfheal_lock_cbk, inodelk,
- dom, &loc, F_SETLK, &flock, NULL);
+ AFR_ONALL(frame, afr_selfheal_lock_cbk, inodelk, dom, &loc, F_SETLK, &flock,
+ NULL);
- loc_wipe (&loc);
+ afr_get_lock_and_eagain_counts(priv, local->replies, &lock_count,
+ &eagain_count);
- return 0;
-}
+ if (lock_count > priv->child_count / 2 && eagain_count) {
+ afr_locked_fill(frame, this, locked_on);
+ afr_selfheal_uninodelk(frame, this, inode, dom, off, size, locked_on);
+
+ AFR_SEQ(frame, afr_selfheal_lock_cbk, inodelk, dom, &loc, F_SETLKW,
+ &flock, NULL);
+ }
+ loc_wipe(&loc);
+
+ return afr_locked_fill(frame, this, locked_on);
+}
int
-afr_selfheal_tryentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
- char *dom, const char *name, unsigned char *locked_on)
+afr_selfheal_uninodelk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ const unsigned char *locked_on)
{
- loc_t loc = {0,};
+ loc_t loc = {
+ 0,
+ };
+ struct gf_flock flock = {
+ 0,
+ };
- loc.inode = inode_ref (inode);
- gf_uuid_copy (loc.gfid, inode->gfid);
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
- AFR_ONALL (frame, afr_selfheal_lock_cbk, entrylk, dom,
- &loc, name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
+ flock.l_type = F_UNLCK;
+ flock.l_start = off;
+ flock.l_len = size;
- loc_wipe (&loc);
+ AFR_ONLIST(locked_on, frame, afr_selfheal_lock_cbk, inodelk, dom, &loc,
+ F_SETLK, &flock, NULL);
- return afr_selfheal_locked_fill (frame, this, locked_on);
-}
+ loc_wipe(&loc);
+ return 0;
+}
int
-afr_selfheal_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
- char *dom, const char *name, unsigned char *locked_on)
+afr_selfheal_tryentrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on)
{
- loc_t loc = {0,};
- afr_local_t *local = NULL;
- int i = 0;
- afr_private_t *priv = NULL;
+ loc_t loc = {
+ 0,
+ };
- priv = this->private;
- local = frame->local;
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
- loc.inode = inode_ref (inode);
- gf_uuid_copy (loc.gfid, inode->gfid);
+ AFR_ONALL(frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, name,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
- AFR_ONALL (frame, afr_selfheal_lock_cbk, entrylk, dom, &loc,
- name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
+ loc_wipe(&loc);
- for (i = 0; i < priv->child_count; i++) {
- if (local->replies[i].op_ret == -1 &&
- local->replies[i].op_errno == EAGAIN) {
- afr_selfheal_locked_fill (frame, this, locked_on);
- afr_selfheal_unentrylk (frame, this, inode, dom, name,
- locked_on);
+ return afr_locked_fill(frame, this, locked_on);
+}
- AFR_SEQ (frame, afr_selfheal_lock_cbk, entrylk, dom,
- &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
- break;
- }
- }
+int
+afr_selfheal_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on)
+{
+ loc_t loc = {
+ 0,
+ };
+ afr_local_t *local = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ AFR_ONALL(frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, name,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == -1 &&
+ local->replies[i].op_errno == EAGAIN) {
+ afr_locked_fill(frame, this, locked_on);
+ afr_selfheal_unentrylk(frame, this, inode, dom, name, locked_on,
+ NULL);
+
+ AFR_SEQ(frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, name,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+ break;
+ }
+ }
- loc_wipe (&loc);
+ loc_wipe(&loc);
- return afr_selfheal_locked_fill (frame, this, locked_on);
+ return afr_locked_fill(frame, this, locked_on);
}
-
int
-afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
- char *dom, const char *name, unsigned char *locked_on)
+afr_selfheal_tie_breaker_entrylk(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, char *dom, const char *name,
+ unsigned char *locked_on)
{
- loc_t loc = {0,};
+ loc_t loc = {
+ 0,
+ };
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int lock_count = 0;
+ int eagain_count = 0;
- loc.inode = inode_ref (inode);
- gf_uuid_copy (loc.gfid, inode->gfid);
+ priv = this->private;
+ local = frame->local;
- AFR_ONLIST (locked_on, frame, afr_selfheal_lock_cbk, entrylk,
- dom, &loc, name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL);
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
- loc_wipe (&loc);
-
- return 0;
-}
+ AFR_ONALL(frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, name,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
+ afr_get_lock_and_eagain_counts(priv, local->replies, &lock_count,
+ &eagain_count);
-gf_boolean_t
-afr_is_pending_set (xlator_t *this, dict_t *xdata, int type)
-{
- int idx = -1;
- afr_private_t *priv = NULL;
- void *pending_raw = NULL;
- int *pending_int = NULL;
- int i = 0;
+ if (lock_count > priv->child_count / 2 && eagain_count) {
+ afr_locked_fill(frame, this, locked_on);
+ afr_selfheal_unentrylk(frame, this, inode, dom, name, locked_on, NULL);
- priv = this->private;
- idx = afr_index_for_transaction_type (type);
+ AFR_SEQ(frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, name,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+ }
- if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw) == 0) {
- if (pending_raw) {
- pending_int = pending_raw;
+ loc_wipe(&loc);
- if (ntoh32 (pending_int[idx]))
- return _gf_true;
- }
- }
+ return afr_locked_fill(frame, this, locked_on);
+}
- for (i = 0; i < priv->child_count; i++) {
- if (dict_get_ptr (xdata, priv->pending_key[i],
- &pending_raw))
- continue;
- if (!pending_raw)
- continue;
- pending_int = pending_raw;
+int
+afr_selfheal_unentrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on,
+ dict_t *xdata)
+{
+ loc_t loc = {
+ 0,
+ };
- if (ntoh32 (pending_int[idx]))
- return _gf_true;
- }
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
- return _gf_false;
-}
+ AFR_ONLIST(locked_on, frame, afr_selfheal_lock_cbk, entrylk, dom, &loc,
+ name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata);
+ loc_wipe(&loc);
-gf_boolean_t
-afr_is_data_set (xlator_t *this, dict_t *xdata)
-{
- return afr_is_pending_set (this, xdata, AFR_DATA_TRANSACTION);
+ return 0;
}
gf_boolean_t
-afr_is_metadata_set (xlator_t *this, dict_t *xdata)
+afr_is_data_set(xlator_t *this, dict_t *xdata)
{
- return afr_is_pending_set (this, xdata, AFR_METADATA_TRANSACTION);
+ return afr_is_pending_set(this, xdata, AFR_DATA_TRANSACTION);
}
gf_boolean_t
-afr_is_entry_set (xlator_t *this, dict_t *xdata)
+afr_is_metadata_set(xlator_t *this, dict_t *xdata)
{
- return afr_is_pending_set (this, xdata, AFR_ENTRY_TRANSACTION);
+ return afr_is_pending_set(this, xdata, AFR_METADATA_TRANSACTION);
}
-
-inode_t*
-afr_inode_link (inode_t *inode, struct iatt *iatt)
+gf_boolean_t
+afr_is_entry_set(xlator_t *this, dict_t *xdata)
{
- inode_t *linked_inode = NULL;
-
- linked_inode = inode_link (inode, NULL, NULL, iatt);
-
- if (linked_inode)
- inode_lookup (linked_inode);
- return linked_inode;
+ return afr_is_pending_set(this, xdata, AFR_ENTRY_TRANSACTION);
}
-
/*
* This function inspects the looked up replies (in an unlocked manner)
* and decides whether a locked verification and possible healing is
@@ -1075,278 +2278,310 @@ afr_inode_link (inode_t *inode, struct iatt *iatt)
*/
int
-afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this,
- uuid_t gfid, inode_t **link_inode,
- gf_boolean_t *data_selfheal,
- gf_boolean_t *metadata_selfheal,
- gf_boolean_t *entry_selfheal)
-{
- afr_private_t *priv = NULL;
- inode_t *inode = NULL;
- int i = 0;
- int valid_cnt = 0;
- struct iatt first = {0, };
- struct afr_reply *replies = NULL;
- int ret = -1;
-
- priv = this->private;
-
- inode = afr_inode_find (this, gfid);
- if (!inode)
- goto out;
+afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
+ inode_t **link_inode, gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal,
+ gf_boolean_t *entry_selfheal,
+ struct afr_reply *replies_dst)
+{
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
+ int i = 0;
+ int valid_cnt = 0;
+ struct iatt first = {
+ 0,
+ };
+ int first_idx = 0;
+ struct afr_reply *replies = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ inode = afr_inode_find(this, gfid);
+ if (!inode)
+ goto out;
+
+ replies = alloca0(sizeof(*replies) * priv->child_count);
+
+ ret = afr_selfheal_unlocked_discover(frame, inode, gfid, replies);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (replies[i].op_ret == -1)
+ continue;
+
+ /* The data segment of the changelog can be non-zero to indicate
+ * the directory needs a full heal. So the check below ensures
+ * it's not a directory before setting the data_selfheal boolean.
+ */
+ if (data_selfheal && !IA_ISDIR(replies[i].poststat.ia_type) &&
+ afr_is_data_set(this, replies[i].xdata))
+ *data_selfheal = _gf_true;
- replies = alloca0 (sizeof (*replies) * priv->child_count);
+ if (metadata_selfheal && afr_is_metadata_set(this, replies[i].xdata))
+ *metadata_selfheal = _gf_true;
- ret = afr_selfheal_unlocked_discover (frame, inode, gfid, replies);
- if (ret)
- goto out;
+ if (entry_selfheal && afr_is_entry_set(this, replies[i].xdata))
+ *entry_selfheal = _gf_true;
- for (i = 0; i < priv->child_count; i++) {
- if (!replies[i].valid)
- continue;
- if (replies[i].op_ret == -1)
- continue;
-
- if (data_selfheal && afr_is_data_set (this, replies[i].xdata))
- *data_selfheal = _gf_true;
-
- if (metadata_selfheal &&
- afr_is_metadata_set (this, replies[i].xdata))
- *metadata_selfheal = _gf_true;
-
- if (entry_selfheal && afr_is_entry_set (this, replies[i].xdata))
- *entry_selfheal = _gf_true;
-
- valid_cnt ++;
- if (valid_cnt == 1) {
- first = replies[i].poststat;
- continue;
- }
-
- if (!IA_EQUAL (first, replies[i].poststat, type)) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- AFR_MSG_SPLIT_BRAIN,
- "TYPE mismatch %d vs %d on %s for gfid:%s",
- (int) first.ia_type,
- (int) replies[i].poststat.ia_type,
- priv->children[i]->name,
- uuid_utoa (replies[i].poststat.ia_gfid));
- ret = -EIO;
- goto out;
- }
-
- if (!IA_EQUAL (first, replies[i].poststat, uid)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "UID mismatch %d vs %d on %s for gfid:%s",
- (int) first.ia_uid,
- (int) replies[i].poststat.ia_uid,
- priv->children[i]->name,
- uuid_utoa (replies[i].poststat.ia_gfid));
-
- if (metadata_selfheal)
- *metadata_selfheal = _gf_true;
- }
-
- if (!IA_EQUAL (first, replies[i].poststat, gid)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "GID mismatch %d vs %d on %s for gfid:%s",
- (int) first.ia_uid,
- (int) replies[i].poststat.ia_uid,
- priv->children[i]->name,
- uuid_utoa (replies[i].poststat.ia_gfid));
-
- if (metadata_selfheal)
- *metadata_selfheal = _gf_true;
- }
-
- if (!IA_EQUAL (first, replies[i].poststat, prot)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "MODE mismatch %d vs %d on %s for gfid:%s",
- (int) st_mode_from_ia (first.ia_prot, 0),
- (int) st_mode_from_ia (replies[i].poststat.ia_prot, 0),
- priv->children[i]->name,
- uuid_utoa (replies[i].poststat.ia_gfid));
-
- if (metadata_selfheal)
- *metadata_selfheal = _gf_true;
- }
-
- if (IA_ISREG(first.ia_type) &&
- !IA_EQUAL (first, replies[i].poststat, size)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "SIZE mismatch %lld vs %lld on %s for gfid:%s",
- (long long) first.ia_size,
- (long long) replies[i].poststat.ia_size,
- priv->children[i]->name,
- uuid_utoa (replies[i].poststat.ia_gfid));
-
- if (data_selfheal)
- *data_selfheal = _gf_true;
- }
- }
-
- if (valid_cnt > 0 && link_inode) {
- *link_inode = afr_inode_link (inode, &first);
- if (!*link_inode) {
- ret = -EINVAL;
- goto out;
- }
- } else if (valid_cnt < 2) {
- ret = -ENOTCONN;
- goto out;
+ valid_cnt++;
+ if (valid_cnt == 1) {
+ first = replies[i].poststat;
+ first_idx = i;
+ continue;
}
- ret = 0;
-out:
- if (inode)
- inode_unref (inode);
- if (replies)
- afr_replies_wipe (replies, priv->child_count);
+ if (!IA_EQUAL(first, replies[i].poststat, type)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "TYPE mismatch %d vs %d on %s for gfid:%s",
+ (int)first.ia_type, (int)replies[i].poststat.ia_type,
+ priv->children[i]->name,
+ uuid_utoa(replies[i].poststat.ia_gfid));
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
+ "subvol=%s;"
+ "type=file;gfid=%s;"
+ "ia_type-%d=%s;ia_type-%d=%s",
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(replies[i].poststat.ia_gfid), first_idx,
+ gf_inode_type_to_str(first.ia_type), i,
+ gf_inode_type_to_str(replies[i].poststat.ia_type));
+ ret = -EIO;
+ goto out;
+ }
- return ret;
-}
+ if (!IA_EQUAL(first, replies[i].poststat, uid)) {
+ gf_msg_debug(this->name, 0,
+ "UID mismatch "
+ "%d vs %d on %s for gfid:%s",
+ (int)first.ia_uid, (int)replies[i].poststat.ia_uid,
+ priv->children[i]->name,
+ uuid_utoa(replies[i].poststat.ia_gfid));
+ if (metadata_selfheal)
+ *metadata_selfheal = _gf_true;
+ }
+
+ if (!IA_EQUAL(first, replies[i].poststat, gid)) {
+ gf_msg_debug(this->name, 0,
+ "GID mismatch "
+ "%d vs %d on %s for gfid:%s",
+ (int)first.ia_uid, (int)replies[i].poststat.ia_uid,
+ priv->children[i]->name,
+ uuid_utoa(replies[i].poststat.ia_gfid));
+
+ if (metadata_selfheal)
+ *metadata_selfheal = _gf_true;
+ }
+
+ if (!IA_EQUAL(first, replies[i].poststat, prot)) {
+ gf_msg_debug(this->name, 0,
+ "MODE mismatch "
+ "%d vs %d on %s for gfid:%s",
+ (int)st_mode_from_ia(first.ia_prot, 0),
+ (int)st_mode_from_ia(replies[i].poststat.ia_prot, 0),
+ priv->children[i]->name,
+ uuid_utoa(replies[i].poststat.ia_gfid));
+
+ if (metadata_selfheal)
+ *metadata_selfheal = _gf_true;
+ }
+
+ if (IA_ISREG(first.ia_type) &&
+ !IA_EQUAL(first, replies[i].poststat, size)) {
+ gf_msg_debug(this->name, 0,
+ "SIZE mismatch "
+ "%lld vs %lld on %s for gfid:%s",
+ (long long)first.ia_size,
+ (long long)replies[i].poststat.ia_size,
+ priv->children[i]->name,
+ uuid_utoa(replies[i].poststat.ia_gfid));
+
+ if (data_selfheal)
+ *data_selfheal = _gf_true;
+ }
+ }
+
+ if (valid_cnt > 0 && link_inode) {
+ *link_inode = inode_link(inode, NULL, NULL, &first);
+ if (!*link_inode) {
+ ret = -EINVAL;
+ goto out;
+ }
+ } else if (valid_cnt < 2) {
+ ret = afr_check_stale_error(replies, priv);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (replies && replies_dst)
+ afr_replies_copy(replies_dst, replies, priv->child_count);
+ if (inode)
+ inode_unref(inode);
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
+
+ return ret;
+}
inode_t *
-afr_inode_find (xlator_t *this, uuid_t gfid)
+afr_inode_find(xlator_t *this, uuid_t gfid)
{
- inode_table_t *table = NULL;
- inode_t *inode = NULL;
+ inode_table_t *table = NULL;
+ inode_t *inode = NULL;
- table = this->itable;
- if (!table)
- return NULL;
+ table = this->itable;
+ if (!table)
+ return NULL;
- inode = inode_find (table, gfid);
- if (inode)
- return inode;
+ inode = inode_find(table, gfid);
+ if (inode)
+ return inode;
- inode = inode_new (table);
- if (!inode)
- return NULL;
+ inode = inode_new(table);
+ if (!inode)
+ return NULL;
- gf_uuid_copy (inode->gfid, gfid);
+ gf_uuid_copy(inode->gfid, gfid);
- return inode;
+ return inode;
}
-
call_frame_t *
-afr_frame_create (xlator_t *this)
+afr_frame_create(xlator_t *this, int32_t *op_errno)
{
- call_frame_t *frame = NULL;
- afr_local_t *local = NULL;
- int op_errno = 0;
- pid_t pid = GF_CLIENT_PID_AFR_SELF_HEALD;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ pid_t pid = GF_CLIENT_PID_SELF_HEALD;
- frame = create_frame (this, this->ctx->pool);
- if (!frame)
- return NULL;
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ return NULL;
+ }
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local) {
- STACK_DESTROY (frame->root);
- return NULL;
- }
+ local = AFR_FRAME_INIT(frame, (*op_errno));
+ if (!local) {
+ STACK_DESTROY(frame->root);
+ return NULL;
+ }
- syncopctx_setfspid (&pid);
+ syncopctx_setfspid(&pid);
- frame->root->pid = pid;
+ frame->root->pid = pid;
- afr_set_lk_owner (frame, this, frame->root);
+ afr_set_lk_owner(frame, this, frame->root);
- return frame;
+ return frame;
}
int
-afr_selfheal_newentry_mark (call_frame_t *frame, xlator_t *this, inode_t *inode,
- int source, struct afr_reply *replies,
- unsigned char *sources, unsigned char *newentry)
+afr_selfheal_newentry_mark(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, struct afr_reply *replies,
+ unsigned char *sources, unsigned char *newentry)
{
- int ret = 0;
- int i = 0;
- afr_private_t *priv = NULL;
- dict_t *xattr = NULL;
- int **changelog = NULL;
+ int ret = 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int **changelog = NULL;
- priv = this->private;
+ priv = this->private;
- gf_uuid_copy (inode->gfid, replies[source].poststat.ia_gfid);
+ gf_uuid_copy(inode->gfid, replies[source].poststat.ia_gfid);
- xattr = dict_new();
- if (!xattr)
- return -ENOMEM;
+ xattr = dict_new();
+ if (!xattr)
+ return -ENOMEM;
- changelog = afr_mark_pending_changelog (priv, newentry, xattr,
- replies[source].poststat.ia_type);
+ changelog = afr_mark_pending_changelog(priv, newentry, xattr,
+ replies[source].poststat.ia_type);
- if (!changelog)
- goto out;
+ if (!changelog) {
+ ret = -ENOMEM;
+ goto out;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (!sources[i])
- continue;
- afr_selfheal_post_op (frame, this, inode, i, xattr);
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ ret |= afr_selfheal_post_op(frame, this, inode, i, xattr, NULL);
+ }
out:
- if (changelog)
- afr_matrix_cleanup (changelog, priv->child_count);
- if (xattr)
- dict_unref (xattr);
- return ret;
+ if (changelog)
+ afr_matrix_cleanup(changelog, priv->child_count);
+ if (xattr)
+ dict_unref(xattr);
+ return ret;
}
int
-afr_selfheal_do (call_frame_t *frame, xlator_t *this, uuid_t gfid)
-{
- int ret = -1;
- int entry_ret = 1;
- int metadata_ret = 1;
- int data_ret = 1;
- int or_ret = 0;
- inode_t *inode = NULL;
- gf_boolean_t data_selfheal = _gf_false;
- gf_boolean_t metadata_selfheal = _gf_false;
- gf_boolean_t entry_selfheal = _gf_false;
-
- ret = afr_selfheal_unlocked_inspect (frame, this, gfid, &inode,
- &data_selfheal,
- &metadata_selfheal,
- &entry_selfheal);
- if (ret)
- goto out;
-
- if (!(data_selfheal || metadata_selfheal || entry_selfheal)) {
- ret = 2;
- goto out;
+afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid)
+{
+ int ret = -1;
+ int entry_ret = 1;
+ int metadata_ret = 1;
+ int data_ret = 1;
+ int or_ret = 0;
+ inode_t *inode = NULL;
+ fd_t *fd = NULL;
+ gf_boolean_t data_selfheal = _gf_false;
+ gf_boolean_t metadata_selfheal = _gf_false;
+ gf_boolean_t entry_selfheal = _gf_false;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ ret = afr_selfheal_unlocked_inspect(frame, this, gfid, &inode,
+ &data_selfheal, &metadata_selfheal,
+ &entry_selfheal, NULL);
+ if (ret)
+ goto out;
+
+ if (!(data_selfheal || metadata_selfheal || entry_selfheal)) {
+ ret = 2;
+ goto out;
+ }
+
+ if (inode->ia_type == IA_IFREG) {
+ ret = afr_selfheal_data_open(this, inode, &fd);
+ if (!fd) {
+ ret = -EIO;
+ goto out;
}
+ }
- if (data_selfheal)
- data_ret = afr_selfheal_data (frame, this, inode);
+ if (data_selfheal && priv->data_self_heal)
+ data_ret = afr_selfheal_data(frame, this, fd);
- if (metadata_selfheal)
- metadata_ret = afr_selfheal_metadata (frame, this, inode);
+ if (metadata_selfheal && priv->metadata_self_heal)
+ metadata_ret = afr_selfheal_metadata(frame, this, inode);
- if (entry_selfheal)
- entry_ret = afr_selfheal_entry (frame, this, inode);
+ if (entry_selfheal && priv->entry_self_heal)
+ entry_ret = afr_selfheal_entry(frame, this, inode);
- or_ret = (data_ret | metadata_ret | entry_ret);
+ or_ret = (data_ret | metadata_ret | entry_ret);
- if (data_ret == -EIO || metadata_ret == -EIO || entry_ret == -EIO)
- ret = -EIO;
- else if (data_ret == 1 && metadata_ret == 1 && entry_ret == 1)
- ret = 1;
- else if (or_ret < 0)
- ret = or_ret;
- else
- ret = 0;
+ if (data_ret == -EIO || metadata_ret == -EIO || entry_ret == -EIO)
+ ret = -EIO;
+ else if (data_ret == 1 && metadata_ret == 1 && entry_ret == 1)
+ ret = 1;
+ else if (or_ret < 0)
+ ret = or_ret;
+ else
+ ret = 0;
out:
- if (inode) {
- inode_forget (inode, 1);
- inode_unref (inode);
- }
- return ret;
+ if (inode)
+ inode_unref(inode);
+ if (fd)
+ fd_unref(fd);
+ return ret;
}
/*
* This is the entry point for healing a given GFID. The return values for this
@@ -1358,19 +2593,342 @@ out:
*/
int
-afr_selfheal (xlator_t *this, uuid_t gfid)
+afr_selfheal(xlator_t *this, uuid_t gfid)
+{
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+
+ frame = afr_frame_create(this, NULL);
+ if (!frame)
+ return ret;
+
+ local = frame->local;
+ local->xdata_req = dict_new();
+
+ ret = afr_selfheal_do(frame, this, gfid);
+
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+
+ return ret;
+}
+
+afr_local_t *
+__afr_dequeue_heals(afr_private_t *priv)
+{
+ afr_local_t *local = NULL;
+
+ if (list_empty(&priv->heal_waiting))
+ goto none;
+ if ((priv->background_self_heal_count > 0) &&
+ (priv->healers >= priv->background_self_heal_count))
+ goto none;
+
+ local = list_entry(priv->heal_waiting.next, afr_local_t, healer);
+ priv->heal_waiters--;
+ GF_ASSERT(priv->heal_waiters >= 0);
+ list_del_init(&local->healer);
+ list_add(&local->healer, &priv->healing);
+ priv->healers++;
+ return local;
+none:
+ gf_msg_debug(THIS->name, 0,
+ "Nothing dequeued. "
+ "Num healers: %d, Num Waiters: %d",
+ priv->healers, priv->heal_waiters);
+ return NULL;
+}
+
+int
+afr_refresh_selfheal_wrap(void *opaque)
{
- int ret = -1;
- call_frame_t *frame = NULL;
+ call_frame_t *heal_frame = opaque;
+ afr_local_t *local = heal_frame->local;
+ int ret = 0;
- frame = afr_frame_create (this);
- if (!frame)
- return ret;
+ ret = afr_selfheal(heal_frame->this, local->refreshinode->gfid);
+ return ret;
+}
- ret = afr_selfheal_do (frame, this, gfid);
+int
+afr_refresh_heal_done(int ret, call_frame_t *frame, void *opaque)
+{
+ call_frame_t *heal_frame = opaque;
+ xlator_t *this = heal_frame->this;
+ afr_private_t *priv = this->private;
+ afr_local_t *local = heal_frame->local;
+
+ LOCK(&priv->lock);
+ {
+ list_del_init(&local->healer);
+ priv->healers--;
+ GF_ASSERT(priv->healers >= 0);
+ local = __afr_dequeue_heals(priv);
+ }
+ UNLOCK(&priv->lock);
+
+ AFR_STACK_DESTROY(heal_frame);
+
+ if (local)
+ afr_heal_synctask(this, local);
+ return 0;
+}
- if (frame)
- AFR_STACK_DESTROY (frame);
+void
+afr_heal_synctask(xlator_t *this, afr_local_t *local)
+{
+ int ret = 0;
+ call_frame_t *heal_frame = NULL;
+
+ heal_frame = local->heal_frame;
+ ret = synctask_new(this->ctx->env, afr_refresh_selfheal_wrap,
+ afr_refresh_heal_done, heal_frame, heal_frame);
+ if (ret < 0)
+ /* Heal not launched. Will be queued when the next inode
+ * refresh happens and shd hasn't healed it yet. */
+ afr_refresh_heal_done(ret, heal_frame, heal_frame);
+}
- return ret;
+gf_boolean_t
+afr_throttled_selfheal(call_frame_t *frame, xlator_t *this)
+{
+ gf_boolean_t can_heal = _gf_true;
+ afr_private_t *priv = this->private;
+ afr_local_t *local = frame->local;
+
+ LOCK(&priv->lock);
+ {
+ if ((priv->background_self_heal_count > 0) &&
+ (priv->heal_wait_qlen + priv->background_self_heal_count) >
+ (priv->heal_waiters + priv->healers)) {
+ list_add_tail(&local->healer, &priv->heal_waiting);
+ priv->heal_waiters++;
+ local = __afr_dequeue_heals(priv);
+ } else {
+ can_heal = _gf_false;
+ }
+ }
+ UNLOCK(&priv->lock);
+
+ if (can_heal) {
+ if (local)
+ afr_heal_synctask(this, local);
+ else
+ gf_msg_debug(this->name, 0,
+ "Max number of heals are "
+ "pending, background self-heal rejected.");
+ }
+
+ return can_heal;
+}
+
+int
+afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources,
+ afr_transaction_type type)
+{
+ int source = -1;
+ int i = 0;
+
+ /* Give preference to local child to save on bandwidth */
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->local[i] && sources[i]) {
+ if ((type == AFR_DATA_TRANSACTION) && AFR_IS_ARBITER_BRICK(priv, i))
+ continue;
+
+ source = i;
+ goto out;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source = i;
+ goto out;
+ }
+ }
+out:
+ return source;
+}
+
+static int
+afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->replies[i].poststat = *buf;
+ local->replies[i].preparent = *preparent;
+ local->replies[i].postparent = *postparent;
+ }
+ if (xdata) {
+ local->replies[i].xdata = dict_ref(xdata);
+ }
+
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+int
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode)
+{
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
+ unsigned char *mkdir_on = alloca0(priv->child_count);
+ unsigned char *lookup_on = alloca0(priv->child_count);
+ loc_t loc = {0};
+ int32_t op_errno = 0;
+ int32_t child_op_errno = 0;
+ struct iatt iatt = {0};
+ dict_t *xdata = NULL;
+ uuid_t anon_inode_gfid = {0};
+ int mkdir_count = 0;
+ int i = 0;
+
+ /*Try to mkdir everywhere and return success if the dir exists on 'child'
+ */
+
+ if (!priv->use_anon_inode) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ frame = afr_frame_create(this, &op_errno);
+ if (op_errno) {
+ goto out;
+ }
+ local = frame->local;
+ if (!local->child_up[child]) {
+ /*Other bricks may need mkdir so don't error out yet*/
+ child_op_errno = ENOTCONN;
+ }
+ gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+
+ if (priv->anon_inode[i]) {
+ mkdir_on[i] = 0;
+ } else {
+ mkdir_on[i] = 1;
+ mkdir_count++;
+ }
+ }
+
+ if (mkdir_count == 0) {
+ *linked_inode = inode_find(this->itable, anon_inode_gfid);
+ if (*linked_inode) {
+ op_errno = 0;
+ goto out;
+ }
+ }
+
+ loc.parent = inode_ref(this->itable->root);
+ loc.name = priv->anon_inode_name;
+ loc.inode = inode_new(this->itable);
+ if (!loc.inode) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ xdata = dict_new();
+ if (!xdata) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true);
+ if (op_errno) {
+ goto out;
+ }
+
+ if (mkdir_count == 0) {
+ memcpy(lookup_on, local->child_up, priv->child_count);
+ goto lookup;
+ }
+
+ AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0,
+ xdata);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!mkdir_on[i]) {
+ continue;
+ }
+
+ if (local->replies[i].op_ret == 0) {
+ priv->anon_inode[i] = 1;
+ iatt = local->replies[i].poststat;
+ } else if (local->replies[i].op_ret < 0 &&
+ local->replies[i].op_errno == EEXIST) {
+ lookup_on[i] = 1;
+ } else if (i == child) {
+ child_op_errno = local->replies[i].op_errno;
+ }
+ }
+
+ if (AFR_COUNT(lookup_on, priv->child_count) == 0) {
+ goto link;
+ }
+
+lookup:
+ AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xdata);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!lookup_on[i]) {
+ continue;
+ }
+
+ if (local->replies[i].op_ret == 0) {
+ if (gf_uuid_compare(anon_inode_gfid,
+ local->replies[i].poststat.ia_gfid) == 0) {
+ priv->anon_inode[i] = 1;
+ iatt = local->replies[i].poststat;
+ } else {
+ if (i == child)
+ child_op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA,
+ "%s has gfid: %s", priv->anon_inode_name,
+ uuid_utoa(local->replies[i].poststat.ia_gfid));
+ }
+ } else if (i == child) {
+ child_op_errno = local->replies[i].op_errno;
+ }
+ }
+link:
+ if (!gf_uuid_is_null(iatt.ia_gfid)) {
+ *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt);
+ if (*linked_inode) {
+ op_errno = 0;
+ inode_lookup(*linked_inode);
+ } else {
+ op_errno = ENOMEM;
+ }
+ goto out;
+ }
+
+out:
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(&loc);
+ /*child_op_errno takes precedence*/
+ if (child_op_errno == 0) {
+ child_op_errno = op_errno;
+ }
+
+ if (child_op_errno && *linked_inode) {
+ inode_unref(*linked_inode);
+ *linked_inode = NULL;
+ }
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ return -child_op_errno;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index a8a7326e4ec..37bcc2b3f9e 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -8,572 +8,592 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "afr.h"
#include "afr-self-heal.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "protocol-common.h"
-
-enum {
- AFR_SELFHEAL_DATA_FULL = 0,
- AFR_SELFHEAL_DATA_DIFF,
-};
-
+#include "afr-messages.h"
+#include <glusterfs/events.h>
#define HAS_HOLES(i) ((i->ia_blocks * 512) < (i->ia_size))
static int
-__checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, uint32_t weak, uint8_t *strong,
- dict_t *xdata)
+__checksum_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, uint32_t weak, uint8_t *strong, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int i = (long) cookie;
-
- local = frame->local;
-
- local->replies[i].valid = 1;
- local->replies[i].op_ret = op_ret;
- local->replies[i].op_errno = op_errno;
- if (strong)
- memcpy (local->replies[i].checksum, strong, MD5_DIGEST_LENGTH);
+ afr_local_t *local = NULL;
+ struct afr_reply *replies = NULL;
+ int i = (long)cookie;
+
+ local = frame->local;
+ replies = local->replies;
+
+ replies[i].valid = 1;
+ replies[i].op_ret = op_ret;
+ replies[i].op_errno = op_errno;
+ if (xdata) {
+ replies[i].buf_has_zeroes = dict_get_str_boolean(
+ xdata, "buf-has-zeroes", _gf_false);
+ replies[i].fips_mode_rchecksum = dict_get_str_boolean(
+ xdata, "fips-mode-rchecksum", _gf_false);
+ }
+ if (strong) {
+ if (replies[i].fips_mode_rchecksum) {
+ memcpy(local->replies[i].checksum, strong, SHA256_DIGEST_LENGTH);
+ } else {
+ memcpy(local->replies[i].checksum, strong, MD5_DIGEST_LENGTH);
+ }
+ }
- syncbarrier_wake (&local->barrier);
- return 0;
+ syncbarrier_wake(&local->barrier);
+ return 0;
}
-
-static int
-attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *pre, struct iatt *post,
- dict_t *xdata)
+static gf_boolean_t
+__afr_can_skip_data_block_heal(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *healed_sinks,
+ off_t offset, size_t size, struct iatt *poststat)
{
- int i = (long) cookie;
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->replies[i].valid = 1;
- local->replies[i].op_ret = op_ret;
- local->replies[i].op_errno = op_errno;
- if (pre)
- local->replies[i].prestat = *pre;
- if (post)
- local->replies[i].poststat = *post;
- if (xdata)
- local->replies[i].xdata = dict_ref (xdata);
-
- syncbarrier_wake (&local->barrier);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ unsigned char *wind_subvols = NULL;
+ gf_boolean_t checksum_match = _gf_true;
+ struct afr_reply *replies = NULL;
+ dict_t *xdata = NULL;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+ replies = local->replies;
+
+ xdata = dict_new();
+ if (!xdata)
+ goto out;
+ if (dict_set_int32_sizen(xdata, "check-zero-filled", 1)) {
+ dict_unref(xdata);
+ goto out;
+ }
+
+ wind_subvols = alloca0(priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source || healed_sinks[i])
+ wind_subvols[i] = 1;
+ }
+
+ AFR_ONLIST(wind_subvols, frame, __checksum_cbk, rchecksum, fd, offset, size,
+ xdata);
+ if (xdata)
+ dict_unref(xdata);
+
+ if (!replies[source].valid || replies[source].op_ret != 0)
+ return _gf_false;
- return 0;
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source)
+ continue;
+ if (replies[i].valid) {
+ if (memcmp(replies[source].checksum, replies[i].checksum,
+ replies[source].fips_mode_rchecksum
+ ? SHA256_DIGEST_LENGTH
+ : MD5_DIGEST_LENGTH)) {
+ checksum_match = _gf_false;
+ break;
+ }
+ }
+ }
+ if (checksum_match) {
+ if (HAS_HOLES(poststat))
+ return _gf_true;
-static gf_boolean_t
-__afr_selfheal_data_checksums_match (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int source,
- unsigned char *healed_sinks,
- off_t offset, size_t size)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- unsigned char *wind_subvols = NULL;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
-
- wind_subvols = alloca0 (priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- if (i == source || healed_sinks[i])
- wind_subvols[i] = 1;
- }
-
- AFR_ONLIST (wind_subvols, frame, __checksum_cbk, rchecksum, fd,
- offset, size, NULL);
-
- if (!local->replies[source].valid || local->replies[source].op_ret != 0)
- return _gf_false;
-
- for (i = 0; i < priv->child_count; i++) {
- if (i == source)
- continue;
- if (memcmp (local->replies[source].checksum,
- local->replies[i].checksum,
- MD5_DIGEST_LENGTH))
- return _gf_false;
- }
-
- return _gf_true;
+ /* For non-sparse files, we might be better off writing the
+ * zeroes to sinks to avoid mismatch of disk-usage in bricks. */
+ if (local->replies[source].buf_has_zeroes)
+ return _gf_false;
+ else
+ return _gf_true;
+ }
+out:
+ return _gf_false;
}
-
static gf_boolean_t
-__afr_is_sink_zero_filled (xlator_t *this, fd_t *fd, size_t size,
- off_t offset, int sink)
+__afr_is_sink_zero_filled(xlator_t *this, fd_t *fd, size_t size, off_t offset,
+ int sink)
{
- afr_private_t *priv = NULL;
- struct iobref *iobref = NULL;
- struct iovec *iovec = NULL;
- int count = 0;
- int ret = 0;
- gf_boolean_t zero_filled = _gf_false;
-
- priv = this->private;
- ret = syncop_readv (priv->children[sink], fd, size, offset, 0, &iovec,
- &count, &iobref, NULL, NULL);
- if (ret < 0)
- goto out;
- ret = iov_0filled (iovec, count);
- if (!ret)
- zero_filled = _gf_true;
+ afr_private_t *priv = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec *iovec = NULL;
+ int count = 0;
+ int ret = 0;
+ gf_boolean_t zero_filled = _gf_false;
+
+ priv = this->private;
+ ret = syncop_readv(priv->children[sink], fd, size, offset, 0, &iovec,
+ &count, &iobref, NULL, NULL, NULL);
+ if (ret < 0)
+ goto out;
+ ret = iov_0filled(iovec, count);
+ if (!ret)
+ zero_filled = _gf_true;
out:
- if (iovec)
- GF_FREE (iovec);
- if (iobref)
- iobref_unref (iobref);
- return zero_filled;
+ if (iovec)
+ GF_FREE(iovec);
+ if (iobref)
+ iobref_unref(iobref);
+ return zero_filled;
}
static int
-__afr_selfheal_data_read_write (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int source, unsigned char *healed_sinks,
- off_t offset, size_t size,
- struct afr_reply *replies, int type)
+__afr_selfheal_data_read_write(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *healed_sinks,
+ off_t offset, size_t size,
+ struct afr_reply *replies, int type)
{
- struct iovec *iovec = NULL;
- int count = 0;
- struct iobref *iobref = NULL;
- int ret = 0;
- int i = 0;
- afr_private_t *priv = NULL;
-
- priv = this->private;
-
- ret = syncop_readv (priv->children[source], fd, size, offset, 0,
- &iovec, &count, &iobref, NULL, NULL);
- if (ret <= 0)
- return ret;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!healed_sinks[i])
- continue;
-
- /*
- * TODO: Use fiemap() and discard() to heal holes
- * in the future.
- *
- * For now,
- *
- * - if the source had any holes at all,
- * AND
- * - if we are writing past the original file size
- * of the sink
- * AND
- * - is NOT the last block of the source file. if
- * the block contains EOF, it has to be written
- * in order to set the file size even if the
- * last block is 0-filled.
- * AND
- * - if the read buffer is filled with only 0's
- *
- * then, skip writing to this source. We don't depend
- * on the write to happen to update the size as we
- * have performed an ftruncate() upfront anyways.
- */
-#define is_last_block(o,b,s) ((s >= o) && (s <= (o + b)))
- if (HAS_HOLES ((&replies[source].poststat)) &&
- offset >= replies[i].poststat.ia_size &&
- !is_last_block (offset, size,
- replies[source].poststat.ia_size) &&
- (iov_0filled (iovec, count) == 0))
- continue;
-
- /* Avoid filling up sparse regions of the sink with 0-filled
- * writes.*/
- if (type == AFR_SELFHEAL_DATA_FULL &&
- HAS_HOLES ((&replies[source].poststat)) &&
- ((offset + size) <= replies[i].poststat.ia_size) &&
- (iov_0filled (iovec, count) == 0) &&
- __afr_is_sink_zero_filled (this, fd, size, offset, i)) {
- continue;
- }
-
- ret = syncop_writev (priv->children[i], fd, iovec, count,
- offset, iobref, 0, NULL, NULL);
- if (ret != iov_length (iovec, count)) {
- /* write() failed on this sink. unset the corresponding
- member in sinks[] (which is healed_sinks[] in the
- caller) so that this server does NOT get considered
- as successfully healed.
- */
- healed_sinks[i] = 0;
- }
- }
- if (iovec)
- GF_FREE (iovec);
- if (iobref)
- iobref_unref (iobref);
-
- return ret;
-}
+ struct iovec *iovec = NULL;
+ int count = 0;
+ struct iobref *iobref = NULL;
+ int ret = 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ ret = syncop_readv(priv->children[source], fd, size, offset, 0, &iovec,
+ &count, &iobref, NULL, NULL, NULL);
+ if (ret <= 0)
+ return ret;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i])
+ continue;
+
+ /*
+ * TODO: Use fiemap() and discard() to heal holes
+ * in the future.
+ *
+ * For now,
+ *
+ * - if the source had any holes at all,
+ * AND
+ * - if we are writing past the original file size
+ * of the sink
+ * AND
+ * - is NOT the last block of the source file. if
+ * the block contains EOF, it has to be written
+ * in order to set the file size even if the
+ * last block is 0-filled.
+ * AND
+ * - if the read buffer is filled with only 0's
+ *
+ * then, skip writing to this source. We don't depend
+ * on the write to happen to update the size as we
+ * have performed an ftruncate() upfront anyways.
+ */
+#define is_last_block(o, b, s) ((s >= o) && (s <= (o + b)))
+ if (HAS_HOLES((&replies[source].poststat)) &&
+ offset >= replies[i].poststat.ia_size &&
+ !is_last_block(offset, size, replies[source].poststat.ia_size) &&
+ (iov_0filled(iovec, count) == 0))
+ continue;
+
+ /* Avoid filling up sparse regions of the sink with 0-filled
+ * writes.*/
+ if (type == AFR_SELFHEAL_DATA_FULL &&
+ HAS_HOLES((&replies[source].poststat)) &&
+ ((offset + size) <= replies[i].poststat.ia_size) &&
+ (iov_0filled(iovec, count) == 0) &&
+ __afr_is_sink_zero_filled(this, fd, size, offset, i)) {
+ continue;
+ }
+
+ ret = syncop_writev(priv->children[i], fd, iovec, count, offset, iobref,
+ 0, NULL, NULL, NULL, NULL);
+ if (ret != iov_length(iovec, count)) {
+ /* write() failed on this sink. unset the corresponding
+ member in sinks[] (which is healed_sinks[] in the
+ caller) so that this server does NOT get considered
+ as successfully healed.
+ */
+ healed_sinks[i] = 0;
+ }
+ }
+ if (iovec)
+ GF_FREE(iovec);
+ if (iobref)
+ iobref_unref(iobref);
+ return ret;
+}
-static int
-afr_selfheal_data_block (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int source, unsigned char *healed_sinks, off_t offset,
- size_t size, int type, struct afr_reply *replies)
+static gf_boolean_t
+afr_source_sinks_locked(xlator_t *this, unsigned char *locked_on, int source,
+ unsigned char *healed_sinks)
{
- int ret = -1;
- int sink_count = 0;
- afr_private_t *priv = NULL;
- unsigned char *data_lock = NULL;
-
- priv = this->private;
- sink_count = AFR_COUNT (healed_sinks, priv->child_count);
- data_lock = alloca0 (priv->child_count);
-
- ret = afr_selfheal_inodelk (frame, this, fd->inode, this->name,
- offset, size, data_lock);
- {
- if (ret < sink_count) {
- ret = -ENOTCONN;
- goto unlock;
- }
-
- if (type == AFR_SELFHEAL_DATA_DIFF &&
- __afr_selfheal_data_checksums_match (frame, this, fd, source,
- healed_sinks, offset, size)) {
- ret = 0;
- goto unlock;
- }
-
- ret = __afr_selfheal_data_read_write (frame, this, fd, source,
- healed_sinks, offset, size,
- replies, type);
- }
-unlock:
- afr_selfheal_uninodelk (frame, this, fd->inode, this->name,
- offset, size, data_lock);
- return ret;
-}
+ afr_private_t *priv = this->private;
+ int i = 0;
+ if (!locked_on[source])
+ return _gf_false;
+ for (i = 0; i < priv->child_count; i++) {
+ if (healed_sinks[i] && locked_on[i])
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
static int
-afr_selfheal_data_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
- unsigned char *healed_sinks)
+afr_selfheal_data_block(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *healed_sinks, off_t offset,
+ size_t size, int type, struct afr_reply *replies)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- AFR_ONLIST (healed_sinks, frame, attr_cbk, fsync, fd, 0, NULL);
-
- for (i = 0; i < priv->child_count; i++)
- if (healed_sinks[i] && local->replies[i].op_ret != 0)
- /* fsync() failed. Do NOT consider this server
- as successfully healed. Mark it so.
- */
- healed_sinks[i] = 0;
- return 0;
-}
+ int ret = -1;
+ afr_private_t *priv = NULL;
+ unsigned char *data_lock = NULL;
+
+ priv = this->private;
+ data_lock = alloca0(priv->child_count);
+
+ ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, offset, size,
+ data_lock);
+ {
+ if (!afr_source_sinks_locked(this, data_lock, source, healed_sinks)) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+ if (type == AFR_SELFHEAL_DATA_DIFF &&
+ __afr_can_skip_data_block_heal(frame, this, fd, source,
+ healed_sinks, offset, size,
+ &replies[source].poststat)) {
+ ret = 0;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_data_read_write(
+ frame, this, fd, source, healed_sinks, offset, size, replies, type);
+ }
+unlock:
+ afr_selfheal_uninodelk(frame, this, fd->inode, this->name, offset, size,
+ data_lock);
+ return ret;
+}
static int
-afr_selfheal_data_restore_time (call_frame_t *frame, xlator_t *this,
- inode_t *inode, int source,
- unsigned char *healed_sinks,
- struct afr_reply *replies)
+afr_selfheal_data_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *healed_sinks)
{
- loc_t loc = {0, };
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
- loc.inode = inode_ref (inode);
- gf_uuid_copy (loc.gfid, inode->gfid);
+ local = frame->local;
+ priv = this->private;
- AFR_ONLIST (healed_sinks, frame, attr_cbk, setattr, &loc,
- &replies[source].poststat,
- (GF_SET_ATTR_ATIME|GF_SET_ATTR_MTIME), NULL);
+ if (!priv->ensure_durability)
+ return 0;
- loc_wipe (&loc);
+ AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, fsync, fd, 0, NULL);
- return 0;
+ for (i = 0; i < priv->child_count; i++)
+ if (healed_sinks[i] && local->replies[i].op_ret != 0)
+ /* fsync() failed. Do NOT consider this server
+ as successfully healed. Mark it so.
+ */
+ healed_sinks[i] = 0;
+ return 0;
}
static int
-afr_data_self_heal_type_get (afr_private_t *priv, unsigned char *healed_sinks,
- int source, struct afr_reply *replies)
+afr_data_self_heal_type_get(afr_private_t *priv, unsigned char *healed_sinks,
+ int source, struct afr_reply *replies)
{
- int type = AFR_SELFHEAL_DATA_FULL;
- int i = 0;
-
- if (priv->data_self_heal_algorithm == NULL) {
- type = AFR_SELFHEAL_DATA_FULL;
- for (i = 0; i < priv->child_count; i++) {
- if (!healed_sinks[i] && i != source)
- continue;
- if (replies[i].poststat.ia_size) {
- type = AFR_SELFHEAL_DATA_DIFF;
- break;
- }
- }
- } else if (strcmp (priv->data_self_heal_algorithm, "full") == 0) {
- type = AFR_SELFHEAL_DATA_FULL;
- } else if (strcmp (priv->data_self_heal_algorithm, "diff") == 0) {
+ int type = AFR_SELFHEAL_DATA_FULL;
+ int i = 0;
+
+ if (priv->data_self_heal_algorithm == AFR_SELFHEAL_DATA_DYNAMIC) {
+ type = AFR_SELFHEAL_DATA_FULL;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i] && i != source)
+ continue;
+ if (replies[i].poststat.ia_size) {
type = AFR_SELFHEAL_DATA_DIFF;
+ break;
+ }
}
- return type;
+ } else {
+ type = priv->data_self_heal_algorithm;
+ }
+ return type;
}
static int
-afr_selfheal_data_do (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int source, unsigned char *healed_sinks,
- struct afr_reply *replies)
+afr_selfheal_data_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source,
+ unsigned char *healed_sinks, struct afr_reply *replies)
{
- afr_private_t *priv = NULL;
- off_t off = 0;
- size_t block = 128 * 1024;
- int type = AFR_SELFHEAL_DATA_FULL;
- int ret = -1;
- call_frame_t *iter_frame = NULL;
-
- priv = this->private;
-
- type = afr_data_self_heal_type_get (priv, healed_sinks, source,
- replies);
-
- iter_frame = afr_copy_frame (frame);
- if (!iter_frame)
- return -ENOMEM;
-
- for (off = 0; off < replies[source].poststat.ia_size; off += block) {
- if (AFR_COUNT (healed_sinks, priv->child_count) == 0) {
- ret = -ENOTCONN;
- goto out;
- }
-
- ret = afr_selfheal_data_block (iter_frame, this, fd, source,
- healed_sinks, off, block, type,
- replies);
- if (ret < 0)
- goto out;
+ afr_private_t *priv = NULL;
+ off_t off = 0;
+ size_t block = 0;
+ int type = AFR_SELFHEAL_DATA_FULL;
+ int ret = -1;
+ call_frame_t *iter_frame = NULL;
+ unsigned char arbiter_sink_status = 0;
+
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+ "performing data selfheal on %s", uuid_utoa(fd->inode->gfid));
+
+ priv = this->private;
+ if (priv->arbiter_count) {
+ arbiter_sink_status = healed_sinks[ARBITER_BRICK_INDEX];
+ healed_sinks[ARBITER_BRICK_INDEX] = 0;
+ }
+
+ block = 128 * 1024 * priv->data_self_heal_window_size;
+
+ type = afr_data_self_heal_type_get(priv, healed_sinks, source, replies);
+
+ iter_frame = afr_copy_frame(frame);
+ if (!iter_frame) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ for (off = 0; off < replies[source].poststat.ia_size; off += block) {
+ if (AFR_COUNT(healed_sinks, priv->child_count) == 0) {
+ ret = -ENOTCONN;
+ goto out;
+ }
- AFR_STACK_RESET (iter_frame);
- }
+ ret = afr_selfheal_data_block(iter_frame, this, fd, source,
+ healed_sinks, off, block, type, replies);
+ if (ret < 0)
+ goto out;
- afr_selfheal_data_restore_time (frame, this, fd->inode, source,
- healed_sinks, replies);
+ AFR_STACK_RESET(iter_frame);
+ if (iter_frame->local == NULL) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+ }
- ret = afr_selfheal_data_fsync (frame, this, fd, healed_sinks);
+ ret = afr_selfheal_data_fsync(frame, this, fd, healed_sinks);
out:
- if (iter_frame)
- AFR_STACK_DESTROY (iter_frame);
- return ret;
-}
+ if (arbiter_sink_status)
+ healed_sinks[ARBITER_BRICK_INDEX] = arbiter_sink_status;
+ if (iter_frame)
+ AFR_STACK_DESTROY(iter_frame);
+ return ret;
+}
static int
-__afr_selfheal_truncate_sinks (call_frame_t *frame, xlator_t *this,
- fd_t *fd, unsigned char *healed_sinks,
- struct afr_reply *replies, uint64_t size)
+__afr_selfheal_truncate_sinks(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *healed_sinks, uint64_t size)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- unsigned char *larger_sinks = 0;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- larger_sinks = alloca0 (priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- if (healed_sinks[i] && replies[i].poststat.ia_size > size)
- larger_sinks[i] = 1;
- }
-
- AFR_ONLIST (larger_sinks, frame, attr_cbk, ftruncate, fd, size, NULL);
-
- for (i = 0; i < priv->child_count; i++)
- if (healed_sinks[i] && local->replies[i].op_ret == -1)
- /* truncate() failed. Do NOT consider this server
- as successfully healed. Mark it so.
- */
- healed_sinks[i] = 0;
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ /* This will send truncate on the arbiter brick as well if it is marked as
+ * sink. If changelog is enabled on the volume it captures truncate as a
+ * data transactions on the arbiter brick. This will help geo-rep to
+ * properly sync the data from master to slave if arbiter is the ACTIVE
+ * brick during syncing and which had got some entries healed for data as
+ * part of self heal.
+ */
+ AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, ftruncate, fd, size,
+ NULL);
+
+ for (i = 0; i < priv->child_count; i++)
+ if (healed_sinks[i] && local->replies[i].op_ret == -1)
+ /* truncate() failed. Do NOT consider this server
+ as successfully healed. Mark it so.
+ */
+ healed_sinks[i] = 0;
+
+ return 0;
}
gf_boolean_t
-afr_has_source_witnesses (xlator_t *this, unsigned char *sources,
- uint64_t *witness)
+afr_has_source_witnesses(xlator_t *this, unsigned char *sources,
+ uint64_t *witness)
{
- int i = 0;
- afr_private_t *priv = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i] && witness[i])
- return _gf_true;
- }
- return _gf_false;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i] && witness[i])
+ return _gf_true;
+ }
+ return _gf_false;
}
static gf_boolean_t
-afr_does_size_mismatch (xlator_t *this, unsigned char *sources,
- struct afr_reply *replies)
+afr_does_size_mismatch(xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies)
{
- int i = 0;
- afr_private_t *priv = NULL;
- struct iatt *min = NULL;
- struct iatt *max = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ struct iatt *min = NULL;
+ struct iatt *max = NULL;
- priv = this->private;
+ priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- if (!replies[i].valid)
- continue;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
- if (replies[i].op_ret < 0)
- continue;
+ if (replies[i].op_ret < 0)
+ continue;
- if (!min)
- min = &replies[i].poststat;
+ if (!sources[i])
+ continue;
- if (!max)
- max = &replies[i].poststat;
+ if (AFR_IS_ARBITER_BRICK(priv, i) && (replies[i].poststat.ia_size == 0))
+ continue;
- if (min->ia_size > replies[i].poststat.ia_size)
- min = &replies[i].poststat;
+ if (!min)
+ min = &replies[i].poststat;
- if (max->ia_size < replies[i].poststat.ia_size)
- max = &replies[i].poststat;
- }
+ if (!max)
+ max = &replies[i].poststat;
- if (min && max) {
- if (min->ia_size != max->ia_size)
- return _gf_true;
- }
+ if (min->ia_size > replies[i].poststat.ia_size)
+ min = &replies[i].poststat;
- return _gf_false;
+ if (max->ia_size < replies[i].poststat.ia_size)
+ max = &replies[i].poststat;
+ }
+
+ if (min && max) {
+ if (min->ia_size != max->ia_size)
+ return _gf_true;
+ }
+
+ return _gf_false;
}
static void
-afr_mark_biggest_witness_as_source (xlator_t *this, unsigned char *sources,
- uint64_t *witness)
+afr_mark_biggest_witness_as_source(xlator_t *this, unsigned char *sources,
+ uint64_t *witness)
{
- int i = 0;
- afr_private_t *priv = NULL;
- uint64_t biggest_witness = 0;
-
- priv = this->private;
- /* Find source with biggest witness count */
- for (i = 0; i < priv->child_count; i++) {
- if (!sources[i])
- continue;
- if (biggest_witness < witness[i])
- biggest_witness = witness[i];
- }
-
- /* Mark files with less witness count as not source */
- for (i = 0; i < priv->child_count; i++) {
- if (!sources[i])
- continue;
- if (witness[i] < biggest_witness)
- sources[i] = 0;
- }
-
- return;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint64_t biggest_witness = 0;
+
+ priv = this->private;
+ /* Find source with biggest witness count */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (biggest_witness < witness[i])
+ biggest_witness = witness[i];
+ }
+
+ /* Mark files with less witness count as not source */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (witness[i] < biggest_witness)
+ sources[i] = 0;
+ }
+
+ return;
}
/* This is a tie breaker function. Only one source be assigned here */
static void
-afr_mark_newest_file_as_source (xlator_t *this, unsigned char *sources,
- struct afr_reply *replies)
+afr_mark_newest_file_as_source(xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies)
{
- int i = 0;
- afr_private_t *priv = NULL;
- int source = -1;
- uint32_t max_ctime = 0;
-
- priv = this->private;
- /* Find source with latest ctime */
- for (i = 0; i < priv->child_count; i++) {
- if (!sources[i])
- continue;
-
- if (max_ctime <= replies[i].poststat.ia_ctime) {
- source = i;
- max_ctime = replies[i].poststat.ia_ctime;
- }
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ uint32_t max_ctime = 0;
+
+ priv = this->private;
+ /* Find source with latest ctime */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+
+ if (max_ctime <= replies[i].poststat.ia_ctime) {
+ source = i;
+ max_ctime = replies[i].poststat.ia_ctime;
}
+ }
- /* Only mark one of the files as source to break ties */
- memset (sources, 0, sizeof (*sources) * priv->child_count);
- sources[source] = 1;
+ /* Only mark one of the files as source to break ties */
+ memset(sources, 0, sizeof(*sources) * priv->child_count);
+ sources[source] = 1;
}
static int
-__afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this,
- unsigned char *sources,
- unsigned char *sinks,
- unsigned char *healed_sinks,
- unsigned char *locked_on,
- struct afr_reply *replies,
- uint64_t *witness)
+__afr_selfheal_data_finalize_source(
+ call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on,
+ unsigned char *undid_pending, struct afr_reply *replies, uint64_t *witness)
{
- int i = 0;
- afr_private_t *priv = NULL;
- int source = -1;
- int sources_count = 0;
- priv = this->private;
-
- sources_count = AFR_COUNT (sources, priv->child_count);
-
- if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0)
- || !sources_count) {
- /* split brain */
- source = afr_mark_split_brain_source_sinks (frame, this,
- sources, sinks,
- healed_sinks,
- locked_on, replies,
- AFR_DATA_TRANSACTION);
- if (source < 0)
- return -EIO;
- return source;
- }
-
- /* No split brain at this point. If we were called from
- * afr_heal_splitbrain_file(), abort.*/
- if (afr_dict_contains_heal_op(frame))
- return -EIO;
-
- /* If there are no witnesses/size-mismatches on sources we are done*/
- if (!afr_does_size_mismatch (this, sources, replies) &&
- !afr_has_source_witnesses (this, sources, witness))
- goto out;
-
- afr_mark_largest_file_as_source (this, sources, replies);
- afr_mark_biggest_witness_as_source (this, sources, witness);
- afr_mark_newest_file_as_source (this, sources, replies);
+ afr_private_t *priv = NULL;
+ int source = -1;
+ int sources_count = 0;
+ priv = this->private;
+
+ sources_count = AFR_COUNT(sources, priv->child_count);
+
+ if ((AFR_CMP(locked_on, healed_sinks, priv->child_count) == 0) ||
+ !sources_count) {
+ /* split brain */
+ source = afr_mark_split_brain_source_sinks(
+ frame, this, inode, sources, sinks, healed_sinks, locked_on,
+ replies, AFR_DATA_TRANSACTION);
+ if (source < 0) {
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
+ "subvol=%s;type=data;"
+ "file=%s",
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(inode->gfid));
+ return -EIO;
+ }
+
+ _afr_fav_child_reset_sink_xattrs(
+ frame, this, inode, source, healed_sinks, undid_pending,
+ AFR_DATA_TRANSACTION, locked_on, replies);
+ goto out;
+ }
+
+ /* No split brain at this point. If we were called from
+ * afr_heal_splitbrain_file(), abort.*/
+ if (afr_dict_contains_heal_op(frame))
+ return -EIO;
+
+ /* If there are no witnesses/size-mismatches on sources we are done*/
+ if (!afr_does_size_mismatch(this, sources, replies) &&
+ !afr_has_source_witnesses(this, sources, witness))
+ goto out;
+
+ afr_mark_largest_file_as_source(this, sources, replies);
+ afr_mark_biggest_witness_as_source(this, sources, witness);
+ afr_mark_newest_file_as_source(this, sources, replies);
+ if (priv->arbiter_count)
+ /* Choose non-arbiter brick as source for empty files. */
+ afr_mark_source_sinks_if_file_empty(this, sources, sinks, healed_sinks,
+ locked_on, replies,
+ AFR_DATA_TRANSACTION);
out:
- afr_mark_active_sinks (this, sources, locked_on, healed_sinks);
+ afr_mark_active_sinks(this, sources, locked_on, healed_sinks);
+ source = afr_choose_source_by_policy(priv, sources, AFR_DATA_TRANSACTION);
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i]) {
- source = i;
- break;
- }
- }
- return source;
+ return source;
}
/*
@@ -586,225 +606,286 @@ out:
* for self-healing, or -1 if no healing is necessary/split brain.
*/
int
-__afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this,
- inode_t *inode, unsigned char *locked_on,
- unsigned char *sources, unsigned char *sinks,
- unsigned char *healed_sinks,
- struct afr_reply *replies)
+__afr_selfheal_data_prepare(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ struct afr_reply *replies, unsigned char *pflag)
{
- int ret = -1;
- int source = -1;
- afr_private_t *priv = NULL;
- uint64_t *witness = NULL;
-
- priv = this->private;
-
- ret = afr_selfheal_unlocked_discover (frame, inode, inode->gfid,
- replies);
-
- if (ret)
- return ret;
-
- witness = alloca0(priv->child_count * sizeof (*witness));
- ret = afr_selfheal_find_direction (frame, this, replies,
- AFR_DATA_TRANSACTION,
- locked_on, sources, sinks, witness);
- if (ret)
- return ret;
-
- /* Initialize the healed_sinks[] array optimistically to
- the intersection of to-be-healed (i.e sinks[]) and
- the list of servers which are up (i.e locked_on[]).
- As we encounter failures in the healing process, we
- will unmark the respective servers in the healed_sinks[]
- array.
- */
- AFR_INTERSECT (healed_sinks, sinks, locked_on, priv->child_count);
-
- source = __afr_selfheal_data_finalize_source (frame, this, sources,
- sinks, healed_sinks,
- locked_on, replies,
- witness);
- if (source < 0)
- return -EIO;
-
- return source;
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ uint64_t *witness = NULL;
+
+ priv = this->private;
+
+ ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies);
+
+ if (ret)
+ return ret;
+
+ witness = alloca0(priv->child_count * sizeof(*witness));
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_DATA_TRANSACTION, locked_on, sources,
+ sinks, witness, pflag);
+ if (ret)
+ return ret;
+
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ AFR_INTERSECT(healed_sinks, sinks, locked_on, priv->child_count);
+
+ source = __afr_selfheal_data_finalize_source(
+ frame, this, inode, sources, sinks, healed_sinks, locked_on,
+ undid_pending, replies, witness);
+ if (source < 0)
+ return -EIO;
+
+ return source;
}
-
static int
-__afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,
- unsigned char *locked_on)
+__afr_selfheal_data(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *locked_on)
{
- afr_private_t *priv = NULL;
- int ret = -1;
- unsigned char *sources = NULL;
- unsigned char *sinks = NULL;
- unsigned char *data_lock = NULL;
- unsigned char *healed_sinks = NULL;
- struct afr_reply *locked_replies = NULL;
- int source = -1;
- gf_boolean_t compat = _gf_false;
- gf_boolean_t did_sh = _gf_true;
- unsigned char *compat_lock = NULL;
-
- priv = this->private;
-
- sources = alloca0 (priv->child_count);
- sinks = alloca0 (priv->child_count);
- healed_sinks = alloca0 (priv->child_count);
- data_lock = alloca0 (priv->child_count);
- compat_lock = alloca0 (priv->child_count);
-
- locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
-
- ret = afr_selfheal_inodelk (frame, this, fd->inode, this->name, 0, 0,
- data_lock);
- {
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
- gf_log (this->name, GF_LOG_DEBUG, "%s: Skipping "
- "self-heal as only %d number of subvolumes "
- "could be locked", uuid_utoa (fd->inode->gfid),
- ret);
- ret = -ENOTCONN;
- goto unlock;
- }
-
- ret = __afr_selfheal_data_prepare (frame, this, fd->inode,
- data_lock, sources, sinks,
- healed_sinks,
- locked_replies);
- if (ret < 0)
- goto unlock;
-
- if (AFR_COUNT(healed_sinks, priv->child_count) == 0) {
- did_sh = _gf_false;
- goto unlock;
- }
-
- source = ret;
-
- ret = __afr_selfheal_truncate_sinks (frame, this, fd, healed_sinks,
- locked_replies,
- locked_replies[source].poststat.ia_size);
- if (ret < 0)
- goto unlock;
-
- ret = 0;
-
- /* Locking from (LLONG_MAX - 2) to (LLONG_MAX - 1) is for
- compatibility with older self-heal clients which do not
- hold a lock in the @priv->sh_domain domain to guard
- against concurrent ongoing self-heals
- */
- afr_selfheal_inodelk (frame, this, fd->inode, this->name,
- LLONG_MAX - 2, 1, compat_lock);
- compat = _gf_true;
- }
-unlock:
- afr_selfheal_uninodelk (frame, this, fd->inode, this->name, 0, 0,
- data_lock);
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
+ struct afr_reply *locked_replies = NULL;
+ int source = -1;
+ gf_boolean_t did_sh = _gf_true;
+ gf_boolean_t is_arbiter_the_only_sink = _gf_false;
+ gf_boolean_t empty_file = _gf_false;
+
+ priv = this->private;
+
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ healed_sinks = alloca0(priv->child_count);
+ data_lock = alloca0(priv->child_count);
+ undid_pending = alloca0(priv->child_count);
+
+ locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
+
+ ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, 0, 0,
+ data_lock);
+ {
+ if (ret < priv->child_count) {
+ gf_msg_debug(this->name, 0,
+ "%s: Skipping "
+ "self-heal as only %d number "
+ "of subvolumes "
+ "could be locked",
+ uuid_utoa(fd->inode->gfid), ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_data_prepare(frame, this, fd->inode, data_lock,
+ sources, sinks, healed_sinks,
+ undid_pending, locked_replies, NULL);
if (ret < 0)
- goto out;
+ goto unlock;
- if (!did_sh)
- goto out;
+ if (AFR_COUNT(healed_sinks, priv->child_count) == 0) {
+ did_sh = _gf_false;
+ goto unlock;
+ }
+
+ source = ret;
- ret = afr_selfheal_data_do (frame, this, fd, source, healed_sinks,
- locked_replies);
- if (ret)
- goto out;
+ if (AFR_IS_ARBITER_BRICK(priv, source)) {
+ empty_file = afr_is_file_empty_on_all_children(priv,
+ locked_replies);
+ if (empty_file)
+ goto restore_time;
- ret = afr_selfheal_undo_pending (frame, this, fd->inode, sources, sinks,
- healed_sinks, AFR_DATA_TRANSACTION,
- locked_replies, data_lock);
+ did_sh = _gf_false;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_truncate_sinks(
+ frame, this, fd, healed_sinks,
+ locked_replies[source].poststat.ia_size);
+ if (ret < 0)
+ goto unlock;
+
+ if (priv->arbiter_count &&
+ AFR_COUNT(healed_sinks, priv->child_count) == 1 &&
+ healed_sinks[ARBITER_BRICK_INDEX]) {
+ is_arbiter_the_only_sink = _gf_true;
+ goto restore_time;
+ }
+ ret = 0;
+ }
+unlock:
+ afr_selfheal_uninodelk(frame, this, fd->inode, this->name, 0, 0, data_lock);
+ if (ret < 0)
+ goto out;
+
+ if (!did_sh)
+ goto out;
+
+ ret = afr_selfheal_data_do(frame, this, fd, source, healed_sinks,
+ locked_replies);
+ if (ret)
+ goto out;
+restore_time:
+ afr_selfheal_restore_time(frame, this, fd->inode, source, healed_sinks,
+ locked_replies);
+
+ if (!is_arbiter_the_only_sink && !empty_file) {
+ ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, 0, 0,
+ data_lock);
+ if (ret < priv->child_count) {
+ ret = -ENOTCONN;
+ did_sh = _gf_false;
+ goto skip_undo_pending;
+ }
+ }
+ ret = afr_selfheal_undo_pending(
+ frame, this, fd->inode, sources, sinks, healed_sinks, undid_pending,
+ AFR_DATA_TRANSACTION, locked_replies, data_lock);
+skip_undo_pending:
+ afr_selfheal_uninodelk(frame, this, fd->inode, this->name, 0, 0, data_lock);
out:
- if (compat)
- afr_selfheal_uninodelk (frame, this, fd->inode, this->name,
- LLONG_MAX - 2, 1, compat_lock);
- if (did_sh)
- afr_log_selfheal (fd->inode->gfid, this, ret, "data", source,
- healed_sinks);
- else
- ret = 1;
+ if (did_sh)
+ afr_log_selfheal(fd->inode->gfid, this, ret, "data", source, sources,
+ healed_sinks);
+ else
+ ret = 1;
- if (locked_replies)
- afr_replies_wipe (locked_replies, priv->child_count);
+ if (locked_replies)
+ afr_replies_wipe(locked_replies, priv->child_count);
- return ret;
+ return ret;
}
-
-static fd_t *
-afr_selfheal_data_open (xlator_t *this, inode_t *inode)
+int
+afr_selfheal_data_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
{
- loc_t loc = {0,};
- int ret = 0;
- fd_t *fd = NULL;
+ afr_local_t *local = NULL;
+ int i = (long)cookie;
+
+ local = frame->local;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+
+ syncbarrier_wake(&local->barrier);
- fd = fd_create (inode, 0);
- if (!fd)
- return NULL;
+ return 0;
+}
- loc.inode = inode_ref (inode);
- gf_uuid_copy (loc.gfid, inode->gfid);
+int
+afr_selfheal_data_open(xlator_t *this, inode_t *inode, fd_t **fd)
+{
+ int ret = 0;
+ fd_t *fd_tmp = NULL;
+ loc_t loc = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+
+ fd_tmp = fd_create(inode, 0);
+ if (!fd_tmp)
+ return -ENOMEM;
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ frame = afr_frame_create(this, &ret);
+ if (!frame) {
+ ret = -ret;
+ fd_unref(fd_tmp);
+ goto out;
+ }
+ local = frame->local;
+
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_data_open_cbk, open, &loc,
+ O_RDWR | O_LARGEFILE, fd_tmp, NULL);
+
+ ret = -ENOTCONN;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+
+ if (local->replies[i].op_ret < 0) {
+ ret = -local->replies[i].op_errno;
+ continue;
+ }
- ret = syncop_open (this, &loc, O_RDWR|O_LARGEFILE, fd, NULL, NULL);
- if (ret) {
- fd_unref (fd);
- fd = NULL;
- } else {
- fd_bind (fd);
- }
+ ret = 0;
+ break;
+ }
- loc_wipe (&loc);
+ if (ret < 0) {
+ fd_unref(fd_tmp);
+ goto out;
+ } else {
+ fd_bind(fd_tmp);
+ }
- return fd;
+ *fd = fd_tmp;
+out:
+ loc_wipe(&loc);
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ return ret;
}
int
-afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode)
+afr_selfheal_data(call_frame_t *frame, xlator_t *this, fd_t *fd)
{
- afr_private_t *priv = NULL;
- unsigned char *locked_on = NULL;
- int ret = 0;
- fd_t *fd = NULL;
-
- priv = this->private;
-
- fd = afr_selfheal_data_open (this, inode);
- if (!fd) {
- gf_log (this->name, GF_LOG_DEBUG, "%s: Failed to open",
- uuid_utoa (inode->gfid));
- return -EIO;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ int ret = 0;
+ inode_t *inode = fd->inode;
+
+ priv = this->private;
+
+ locked_on = alloca0(priv->child_count);
+
+ ret = afr_selfheal_tie_breaker_inodelk(frame, this, inode, priv->sh_domain,
+ 0, 0, locked_on);
+ {
+ if (ret < priv->child_count) {
+ gf_msg_debug(this->name, 0,
+ "%s: Skipping "
+ "self-heal as only %d number of "
+ "subvolumes could be locked",
+ uuid_utoa(fd->inode->gfid), ret);
+ /* Either less than two subvols available, or another
+ selfheal (from another server) is in progress. Skip
+ for now in any case there isn't anything to do.
+ */
+ ret = -ENOTCONN;
+ goto unlock;
}
- locked_on = alloca0 (priv->child_count);
-
- ret = afr_selfheal_tryinodelk (frame, this, inode, priv->sh_domain, 0, 0,
- locked_on);
- {
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
- gf_log (this->name, GF_LOG_DEBUG, "%s: Skipping "
- "self-heal as only %d number of subvolumes "
- "could be locked", uuid_utoa (fd->inode->gfid),
- ret);
- /* Either less than two subvols available, or another
- selfheal (from another server) is in progress. Skip
- for now in any case there isn't anything to do.
- */
- ret = -ENOTCONN;
- goto unlock;
- }
-
- ret = __afr_selfheal_data (frame, this, fd, locked_on);
- }
+ ret = __afr_selfheal_data(frame, this, fd, locked_on);
+ }
unlock:
- afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0, locked_on);
-
- if (fd)
- fd_unref (fd);
+ afr_selfheal_uninodelk(frame, this, inode, priv->sh_domain, 0, 0,
+ locked_on);
- return ret;
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index cb3648c07bd..64893f441e3 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -8,755 +8,1269 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "afr.h"
#include "afr-self-heal.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "afr-transaction.h"
+#include "afr-messages.h"
+#include <glusterfs/syncop-utils.h>
+#include <glusterfs/events.h>
-/* Max file name length is 255 this filename is of length 256. No file with
- * this name can ever come, entry-lock with this name is going to prevent
- * self-heals from older versions while the granular entry-self-heal is going
- * on in newer version.*/
-#define LONG_FILENAME "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"\
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"\
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"\
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"\
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-
-static int
-afr_selfheal_entry_delete (xlator_t *this, inode_t *dir, const char *name,
- inode_t *inode, int child, struct afr_reply *replies)
+int
+afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child,
+ struct afr_reply *replies,
+ gf_boolean_t *anon_inode)
{
- afr_private_t *priv = NULL;
- xlator_t *subvol = NULL;
- int ret = 0;
- loc_t loc = {0, };
- char g[64];
-
- priv = this->private;
-
- subvol = priv->children[child];
-
- loc.parent = inode_ref (dir);
- gf_uuid_copy (loc.pargfid, dir->gfid);
- loc.name = name;
- loc.inode = inode_ref (inode);
-
- if (replies[child].valid && replies[child].op_ret == 0) {
- switch (replies[child].poststat.ia_type) {
- case IA_IFDIR:
- gf_log (this->name, GF_LOG_WARNING,
- "expunging dir %s/%s (%s) on %s",
- uuid_utoa (dir->gfid), name,
- uuid_utoa_r (replies[child].poststat.ia_gfid, g),
- subvol->name);
- ret = syncop_rmdir (subvol, &loc, 1, NULL, NULL);
- break;
- default:
- gf_log (this->name, GF_LOG_WARNING,
- "expunging file %s/%s (%s) on %s",
- uuid_utoa (dir->gfid), name,
- uuid_utoa_r (replies[child].poststat.ia_gfid, g),
- subvol->name);
- ret = syncop_unlink (subvol, &loc, NULL, NULL);
- break;
- }
- }
-
- loc_wipe (&loc);
-
- return ret;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int ret = 0;
+ int i = 0;
+ char g[64] = {0};
+ unsigned char *lookup_success = NULL;
+ call_frame_t *frame = NULL;
+ loc_t loc2 = {
+ 0,
+ };
+ loc_t loc = {
+ 0,
+ };
+
+ priv = this->private;
+ subvol = priv->children[child];
+ lookup_success = alloca0(priv->child_count);
+ uuid_utoa_r(replies[child].poststat.ia_gfid, g);
+ loc.inode = inode_new(inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (replies[child].poststat.ia_type == IA_IFDIR) {
+ /* This directory may have sub-directory hierarchy which may need to
+ * be preserved for subsequent heals. So unconditionally move the
+ * directory to anonymous-inode directory*/
+ *anon_inode = _gf_true;
+ goto anon_inode;
+ }
+
+ frame = afr_frame_create(this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+ local = frame->local;
+ gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid);
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ lookup_success[i] = 1;
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ ret = -local->replies[i].op_errno;
+ }
+ }
+
+ if (priv->quorum_count) {
+ if (afr_has_quorum(lookup_success, this, NULL)) {
+ *anon_inode = _gf_true;
+ }
+ } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) {
+ *anon_inode = _gf_true;
+ } else if (ret) {
+ goto out;
+ }
+
+anon_inode:
+ if (!*anon_inode) {
+ ret = 0;
+ goto out;
+ }
+
+ loc.parent = inode_ref(dir);
+ gf_uuid_copy(loc.pargfid, dir->gfid);
+ loc.name = name;
+
+ ret = afr_anon_inode_create(this, child, &loc2.parent);
+ if (ret < 0)
+ goto out;
+
+ loc2.name = g;
+ ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "Rename to %s dir %s/%s (%s) on %s failed",
+ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
+ subvol->name);
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "Rename to %s dir %s/%s (%s) on %s successful",
+ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
+ subvol->name);
+ }
+
+out:
+ loc_wipe(&loc);
+ loc_wipe(&loc2);
+ if (frame) {
+ AFR_STACK_DESTROY(frame);
+ }
+
+ return ret;
}
+int
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child, struct afr_reply *replies)
+{
+ char g[64] = {0};
+ afr_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ gf_boolean_t anon_inode = _gf_false;
+
+ priv = this->private;
+ subvol = priv->children[child];
+
+ if ((!replies[child].valid) || (replies[child].op_ret < 0)) {
+ /*Nothing to do*/
+ ret = 0;
+ goto out;
+ }
+
+ if (priv->use_anon_inode) {
+ ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child,
+ replies, &anon_inode);
+ if (ret < 0 || anon_inode)
+ goto out;
+ }
+
+ loc.parent = inode_ref(dir);
+ loc.inode = inode_new(inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ loc.name = name;
+ switch (replies[child].poststat.ia_type) {
+ case IA_IFDIR:
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name,
+ uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
+ break;
+ default:
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
+ name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_unlink(subvol, &loc, NULL, NULL);
+ break;
+ }
+
+out:
+ loc_wipe(&loc);
+ return ret;
+}
int
-afr_selfheal_recreate_entry (xlator_t *this, int dst, int source, inode_t *dir,
- const char *name, inode_t *inode,
- struct afr_reply *replies,
- unsigned char *newentry)
+afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
+ unsigned char *sources, inode_t *dir,
+ const char *name, inode_t *inode,
+ struct afr_reply *replies)
{
- int ret = 0;
- loc_t loc = {0,};
- loc_t srcloc = {0,};
- afr_private_t *priv = NULL;
- dict_t *xdata = NULL;
- struct iatt *iatt = NULL;
- char *linkname = NULL;
- mode_t mode = 0;
- struct iatt newent = {0,};
- priv = this->private;
-
- xdata = dict_new();
- if (!xdata)
- return -ENOMEM;
-
- loc.parent = inode_ref (dir);
- gf_uuid_copy (loc.pargfid, dir->gfid);
- loc.name = name;
- loc.inode = inode_ref (inode);
-
- ret = afr_selfheal_entry_delete (this, dir, name, inode, dst, replies);
- if (ret)
- goto out;
-
- ret = dict_set_static_bin (xdata, "gfid-req",
- replies[source].poststat.ia_gfid, 16);
- if (ret)
- goto out;
-
- iatt = &replies[source].poststat;
-
- srcloc.inode = inode_ref (inode);
- gf_uuid_copy (srcloc.gfid, iatt->ia_gfid);
-
- mode = st_mode_from_ia (iatt->ia_prot, iatt->ia_type);
-
- switch (iatt->ia_type) {
- case IA_IFDIR:
- ret = syncop_mkdir (priv->children[dst], &loc, mode, 0,
- xdata, NULL);
- if (ret == 0)
- newentry[dst] = 1;
- break;
- case IA_IFLNK:
- ret = syncop_lookup (priv->children[dst], &srcloc, 0, 0, 0, 0);
- if (ret == 0) {
- ret = syncop_link (priv->children[dst], &srcloc, &loc,
- NULL, NULL);
- } else {
- ret = syncop_readlink (priv->children[source], &srcloc,
- &linkname, 4096, NULL, NULL);
- if (ret <= 0)
- goto out;
- ret = syncop_symlink (priv->children[dst], &loc,
- linkname, NULL, xdata, NULL);
- if (ret == 0)
- newentry[dst] = 1;
- }
- break;
- default:
- ret = dict_set_int32 (xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
- if (ret)
- goto out;
- ret = syncop_mknod (priv->children[dst], &loc, mode,
- iatt->ia_rdev, &newent, xdata, NULL);
- if (ret == 0 && newent.ia_nlink == 1) {
- /* New entry created. Mark @dst pending on all sources */
- newentry[dst] = 1;
- }
- break;
- }
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ loc_t srcloc = {
+ 0,
+ };
+ loc_t anonloc = {
+ 0,
+ };
+ xlator_t *this = frame->this;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ struct iatt *iatt = NULL;
+ char *linkname = NULL;
+ mode_t mode = 0;
+ struct iatt newent = {
+ 0,
+ };
+ unsigned char *newentry = NULL;
+ char iatt_uuid_str[64] = {0};
+ char dir_uuid_str[64] = {0};
+
+ priv = this->private;
+ iatt = &replies[source].poststat;
+ uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str);
+ if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED,
+ "Invalid ia_type (%d) or gfid(%s). source brick=%d, "
+ "pargfid=%s, name=%s",
+ iatt->ia_type, iatt_uuid_str, source,
+ uuid_utoa_r(dir->gfid, dir_uuid_str), name);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ xdata = dict_new();
+ if (!xdata)
+ return -ENOMEM;
+ newentry = alloca0(priv->child_count);
+ loc.parent = inode_ref(dir);
+ gf_uuid_copy(loc.pargfid, dir->gfid);
+ loc.name = name;
+ loc.inode = inode_ref(inode);
+
+ ret = afr_selfheal_entry_delete(this, dir, name, inode, dst, replies);
+ if (ret)
+ goto out;
+
+ ret = dict_set_gfuuid(xdata, "gfid-req", replies[source].poststat.ia_gfid,
+ true);
+ if (ret)
+ goto out;
+
+ srcloc.inode = inode_ref(inode);
+ gf_uuid_copy(srcloc.gfid, iatt->ia_gfid);
+ ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
+ if (ret == -ENOENT || ret == -ESTALE) {
+ newentry[dst] = 1;
+ ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies,
+ sources, newentry);
+ if (ret)
+ goto out;
+ } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) {
+ // Try rename from hidden directory
+ ret = afr_anon_inode_create(this, dst, &anonloc.parent);
+ if (ret < 0)
+ goto out;
+ anonloc.inode = inode_ref(inode);
+ anonloc.name = iatt_uuid_str;
+ ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL);
+ if (ret == -ENOENT || ret == -ESTALE)
+ ret = -1; /*This sets 'mismatch' to true*/
+ goto out;
+ }
+
+ mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type);
+
+ switch (iatt->ia_type) {
+ case IA_IFDIR:
+ ret = syncop_mkdir(priv->children[dst], &loc, mode, 0, xdata, NULL);
+ break;
+ case IA_IFLNK:
+ if (!newentry[dst]) {
+ ret = syncop_link(priv->children[dst], &srcloc, &loc, &newent,
+ NULL, NULL);
+ } else {
+ ret = syncop_readlink(priv->children[source], &srcloc,
+ &linkname, 4096, NULL, NULL);
+ if (ret <= 0)
+ goto out;
+ ret = syncop_symlink(priv->children[dst], &loc, linkname, NULL,
+ xdata, NULL);
+ }
+ break;
+ default:
+ ret = dict_set_int32_sizen(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
+ if (ret)
+ goto out;
+ ret = syncop_mknod(
+ priv->children[dst], &loc, mode,
+ makedev(ia_major(iatt->ia_rdev), ia_minor(iatt->ia_rdev)),
+ &newent, xdata, NULL);
+ break;
+ }
out:
- if (xdata)
- dict_unref (xdata);
- GF_FREE (linkname);
- loc_wipe (&loc);
- loc_wipe (&srcloc);
- return ret;
+ if (xdata)
+ dict_unref(xdata);
+ GF_FREE(linkname);
+ loc_wipe(&loc);
+ loc_wipe(&srcloc);
+ loc_wipe(&anonloc);
+ return ret;
}
+static int
+__afr_selfheal_heal_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *name, inode_t *inode, int source,
+ unsigned char *sources, unsigned char *healed_sinks,
+ unsigned char *locked_on, struct afr_reply *replies)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+
+ if (!replies[source].valid)
+ return -EIO;
+
+ /* Skip healing this entry if the last lookup on it failed for reasons
+ * other than ENOENT.
+ */
+ if ((replies[source].op_ret < 0) && (replies[source].op_errno != ENOENT))
+ return -replies[source].op_errno;
+
+ if (replies[source].op_ret == 0) {
+ ret = afr_lookup_and_heal_gfid(this, fd->inode, name, inode, replies,
+ source, sources,
+ &replies[source].poststat.ia_gfid, NULL);
+ if (ret)
+ return ret;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i])
+ continue;
+ if (replies[source].op_ret == -1 &&
+ replies[source].op_errno == ENOENT) {
+ ret = afr_selfheal_entry_delete(this, fd->inode, name, inode, i,
+ replies);
+ } else {
+ if (!gf_uuid_compare(replies[i].poststat.ia_gfid,
+ replies[source].poststat.ia_gfid))
+ continue;
+
+ ret = afr_selfheal_recreate_entry(frame, i, source, sources,
+ fd->inode, name, inode, replies);
+ }
+ if (ret < 0)
+ break;
+ }
+
+ return ret;
+}
static int
-__afr_selfheal_heal_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
- char *name, inode_t *inode, int source,
- unsigned char *sources, unsigned char *healed_sinks,
- unsigned char *locked_on, struct afr_reply *replies)
+afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this,
+ struct afr_reply *replies,
+ inode_t *inode, uuid_t pargfid,
+ char *bname, int src_idx,
+ unsigned char *locked_on, int *src)
{
- int ret = 0;
- afr_private_t *priv = NULL;
- int i = 0;
- unsigned char *newentry = NULL;
-
- priv = this->private;
-
- newentry = alloca0 (priv->child_count);
-
- if (!replies[source].valid)
- return -EIO;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!healed_sinks[i])
- continue;
- if (replies[source].op_ret == -1 &&
- replies[source].op_errno == ENOENT) {
- ret = afr_selfheal_entry_delete (this, fd->inode, name,
- inode, i, replies);
- } else {
- if (!gf_uuid_compare (replies[i].poststat.ia_gfid,
- replies[source].poststat.ia_gfid))
- continue;
-
- ret = afr_selfheal_recreate_entry (this, i, source,
- fd->inode, name, inode,
- replies, newentry);
- }
- if (ret < 0)
- break;
- }
-
- if (AFR_COUNT (newentry, priv->child_count))
- afr_selfheal_newentry_mark (frame, this, inode, source, replies,
- sources, newentry);
- return ret;
+ int i = 0;
+ int ret = -1;
+ afr_private_t *priv = NULL;
+ void *gfid = NULL;
+ ia_type_t ia_type = IA_INVAL;
+
+ priv = this->private;
+ gfid = &replies[src_idx].poststat.ia_gfid;
+ ia_type = replies[src_idx].poststat.ia_type;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == src_idx)
+ continue;
+
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret != 0)
+ continue;
+
+ if (gf_uuid_is_null(replies[i].poststat.ia_gfid))
+ continue;
+
+ if (replies[i].poststat.ia_type == IA_INVAL)
+ continue;
+
+ if (ia_type == IA_INVAL || gf_uuid_is_null(gfid)) {
+ src_idx = i;
+ ia_type = replies[src_idx].poststat.ia_type;
+ gfid = &replies[src_idx].poststat.ia_gfid;
+ continue;
+ }
+
+ if (gf_uuid_compare(gfid, replies[i].poststat.ia_gfid) &&
+ (ia_type == replies[i].poststat.ia_type)) {
+ ret = afr_gfid_split_brain_source(this, replies, inode, pargfid,
+ bname, src_idx, i, locked_on, src,
+ NULL);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "Skipping conservative merge on the "
+ "file.");
+ return ret;
+ }
+
+ if (ia_type != replies[i].poststat.ia_type) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "Type mismatch detected "
+ "for <gfid:%s>/%s>, %s on %s and %s on %s. "
+ "Skipping conservative merge on the file.",
+ uuid_utoa(pargfid), bname,
+ gf_inode_type_to_str(replies[i].poststat.ia_type),
+ priv->children[i]->name,
+ gf_inode_type_to_str(replies[src_idx].poststat.ia_type),
+ priv->children[src_idx]->name);
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
+ "subvol=%s;type=file;"
+ "file=<gfid:%s>/%s>;count=2;child-%d=%s;type-"
+ "%d=%s;child-%d=%s;type-%d=%s",
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(pargfid), bname, i, priv->children[i]->name, i,
+ gf_inode_type_to_str(replies[i].poststat.ia_type), src_idx,
+ priv->children[src_idx]->name, src_idx,
+ gf_inode_type_to_str(replies[src_idx].poststat.ia_type));
+ return -1;
+ }
+ }
+
+ return 0;
}
static int
-afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this,
- struct afr_reply *replies,
- uuid_t pargfid, char *bname,
- int src_idx)
+__afr_selfheal_merge_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *name, inode_t *inode, unsigned char *sources,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on, struct afr_reply *replies)
{
- int i = 0;
- char g1[64] = {0,};
- char g2[64] = {0,};
- afr_private_t *priv = NULL;
+ int ret = 0;
+ int i = 0;
+ int source = -1;
+ int src = -1;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ source = i;
+ break;
+ }
+ }
- priv = this->private;
+ if (source == -1) {
+ /* entry got deleted in the mean time? */
+ return 0;
+ }
+ /* Set all the sources as 1, otheriwse newentry_mark won't be set */
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ sources[i] = 1;
+ }
+ }
+
+ ret = afr_lookup_and_heal_gfid(this, fd->inode, name, inode, replies,
+ source, sources,
+ &replies[source].poststat.ia_gfid, NULL);
+ if (ret)
+ return ret;
+
+ /* In case of type mismatch / unable to resolve gfid mismatch on the
+ * entry, return -1.*/
+ ret = afr_selfheal_detect_gfid_and_type_mismatch(
+ this, replies, inode, fd->inode->gfid, name, source, locked_on, &src);
+
+ if (ret < 0)
+ return ret;
+ if (src != -1) {
+ source = src;
for (i = 0; i < priv->child_count; i++) {
- if (i == src_idx)
- continue;
-
- if (!replies[i].valid)
- continue;
-
- if (replies[i].op_ret != 0)
- continue;
-
- if (gf_uuid_compare (replies[src_idx].poststat.ia_gfid,
- replies[i].poststat.ia_gfid)) {
- gf_log (this->name, GF_LOG_ERROR, "Gfid mismatch "
- "detected for <%s/%s>, %s on %s and %s on %s. "
- "Skipping conservative merge on the file.",
- uuid_utoa (pargfid), bname,
- uuid_utoa_r (replies[i].poststat.ia_gfid, g1),
- priv->children[i]->name,
- uuid_utoa_r (replies[src_idx].poststat.ia_gfid,
- g2), priv->children[src_idx]->name);
- return -1;
- }
-
- if ((replies[src_idx].poststat.ia_type) !=
- (replies[i].poststat.ia_type)) {
- gf_log (this->name, GF_LOG_ERROR, "Type mismatch "
- "detected for <%s/%s>, %d on %s and %d on %s. "
- "Skipping conservative merge on the file.",
- uuid_utoa (pargfid), bname,
- replies[i].poststat.ia_type,
- priv->children[i]->name,
- replies[src_idx].poststat.ia_type,
- priv->children[src_idx]->name);
- return -1;
- }
+ if (i != src && replies[i].valid &&
+ gf_uuid_compare(replies[src].poststat.ia_gfid,
+ replies[i].poststat.ia_gfid)) {
+ sources[i] = 0;
+ }
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source || !healed_sinks[i])
+ continue;
+
+ if (src != -1) {
+ if (!gf_uuid_compare(replies[src].poststat.ia_gfid,
+ replies[i].poststat.ia_gfid))
+ continue;
+ } else if (replies[i].op_errno != ENOENT) {
+ continue;
}
- return 0;
+ ret |= afr_selfheal_recreate_entry(frame, i, source, sources, fd->inode,
+ name, inode, replies);
+ }
+
+ return ret;
}
static int
-__afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
- char *name, inode_t *inode, unsigned char *sources,
- unsigned char *healed_sinks, unsigned char *locked_on,
- struct afr_reply *replies)
+__afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *name, inode_t *inode, int source,
+ unsigned char *sources, unsigned char *healed_sinks,
+ unsigned char *locked_on, struct afr_reply *replies)
{
- int ret = 0;
- int i = 0;
- int source = -1;
- unsigned char *newentry = NULL;
- afr_private_t *priv = NULL;
+ int ret = -1;
+
+ if (source < 0)
+ ret = __afr_selfheal_merge_dirent(frame, this, fd, name, inode, sources,
+ healed_sinks, locked_on, replies);
+ else
+ ret = __afr_selfheal_heal_dirent(frame, this, fd, name, inode, source,
+ sources, healed_sinks, locked_on,
+ replies);
+ return ret;
+}
- priv = this->private;
+static gf_boolean_t
+is_full_heal_marker_present(xlator_t *this, dict_t *xdata, int idx)
+{
+ int i = 0;
+ int pending[3] = {
+ 0,
+ };
+ void *pending_raw = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (!xdata)
+ return _gf_false;
+
+ /* Iterate over each of the priv->pending_keys[] elements and then
+ * see if any of them have data segment non-zero. If they do, return
+ * true. Else return false.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (dict_get_ptr(xdata, priv->pending_key[i], &pending_raw))
+ continue;
+
+ if (!pending_raw)
+ continue;
+
+ memcpy(pending, pending_raw, sizeof(pending));
+ if (ntoh32(pending[idx]))
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
- newentry = alloca0 (priv->child_count);
+static gf_boolean_t
+afr_need_full_heal(xlator_t *this, struct afr_reply *replies, int source,
+ unsigned char *healed_sinks, afr_transaction_type type)
+{
+ int i = 0;
+ int idx = 0;
+ afr_private_t *priv = NULL;
- for (i = 0; i < priv->child_count; i++) {
- if (replies[i].valid && replies[i].op_ret == 0) {
- source = i;
- break;
- }
- }
+ priv = this->private;
- if (source == -1) {
- /* entry got deleted in the mean time? */
- return 0;
- }
+ if (!priv->esh_granular)
+ return _gf_true;
- /* In case of a gfid or type mismatch on the entry, return -1.*/
- ret = afr_selfheal_detect_gfid_and_type_mismatch (this, replies,
- fd->inode->gfid,
- name, source);
+ if (type != AFR_ENTRY_TRANSACTION)
+ return _gf_true;
- if (ret < 0)
- return ret;
+ priv = this->private;
+ idx = afr_index_for_transaction_type(AFR_DATA_TRANSACTION);
- for (i = 0; i < priv->child_count; i++) {
- if (i == source || !healed_sinks[i])
- continue;
+ /* If there is a clear source, check whether the full-heal-indicator
+ * is present in its xdata. Otherwise, we need to examine all the
+ * participating bricks and then figure if *even* one of them has a
+ * full-heal-indicator.
+ */
- if (replies[i].op_errno != ENOENT)
- continue;
+ if (source != -1) {
+ if (is_full_heal_marker_present(this, replies[source].xdata, idx))
+ return _gf_true;
+ }
- ret = afr_selfheal_recreate_entry (this, i, source, fd->inode,
- name, inode, replies,
- newentry);
- }
+ /* else ..*/
- if (AFR_COUNT (newentry, priv->child_count))
- afr_selfheal_newentry_mark (frame, this, inode, source, replies,
- sources, newentry);
- return ret;
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i])
+ continue;
+ if (is_full_heal_marker_present(this, replies[i].xdata, idx))
+ return _gf_true;
+ }
-static int
-__afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
- char *name, inode_t *inode, int source,
- unsigned char *sources, unsigned char *healed_sinks,
- unsigned char *locked_on,
- struct afr_reply *replies)
-{
- int ret = -1;
-
- if (source < 0)
- ret = __afr_selfheal_merge_dirent (frame, this, fd, name, inode,
- sources, healed_sinks,
- locked_on, replies);
- else
- ret = __afr_selfheal_heal_dirent (frame, this, fd, name, inode,
- source, sources, healed_sinks,
- locked_on, replies);
- return ret;
+ return _gf_false;
}
static int
-__afr_selfheal_entry_finalize_source (xlator_t *this, unsigned char *sources,
- unsigned char *healed_sinks,
- unsigned char *locked_on,
- struct afr_reply *replies,
- uint64_t *witness)
+__afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ uint64_t *witness)
{
- int i = 0;
- afr_private_t *priv = NULL;
- int source = -1;
- int sources_count = 0;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ int sources_count = 0;
+ int i = 0;
- priv = this->private;
+ priv = this->private;
- sources_count = AFR_COUNT (sources, priv->child_count);
+ sources_count = AFR_COUNT(sources, priv->child_count);
- if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0)
- || !sources_count || afr_does_witness_exist (this, witness)) {
+ if ((AFR_CMP(locked_on, healed_sinks, priv->child_count) == 0) ||
+ !sources_count || afr_does_witness_exist(this, witness)) {
+ memset(sources, 0, sizeof(*sources) * priv->child_count);
+ afr_mark_active_sinks(this, sources, locked_on, healed_sinks);
+ return -1;
+ }
- memset (sources, 0, sizeof (*sources) * priv->child_count);
- afr_mark_active_sinks (this, sources, locked_on, healed_sinks);
- return -1;
- }
+ source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION);
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i]) {
- source = i;
- break;
- }
- }
+ /*If the selected source does not blame any other brick, then mark
+ * everything as sink to trigger conservative merge.
+ */
+ if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_on[i]) {
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ }
+ }
+ return -1;
+ }
- return source;
+ return source;
}
int
-__afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this,
- inode_t *inode, unsigned char *locked_on,
- unsigned char *sources, unsigned char *sinks,
- unsigned char *healed_sinks,
- struct afr_reply *replies, int *source_p)
+__afr_selfheal_entry_prepare(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
+ struct afr_reply *replies, int *source_p,
+ unsigned char *pflag)
{
- int ret = -1;
- int source = -1;
- afr_private_t *priv = NULL;
- uint64_t *witness = NULL;
-
- priv = this->private;
-
- ret = afr_selfheal_unlocked_discover (frame, inode, inode->gfid,
- replies);
- if (ret)
- return ret;
-
- witness = alloca0 (sizeof (*witness) * priv->child_count);
- ret = afr_selfheal_find_direction (frame, this, replies,
- AFR_ENTRY_TRANSACTION,
- locked_on, sources, sinks, witness);
- if (ret)
- return ret;
-
- /* Initialize the healed_sinks[] array optimistically to
- the intersection of to-be-healed (i.e sinks[]) and
- the list of servers which are up (i.e locked_on[]).
-
- As we encounter failures in the healing process, we
- will unmark the respective servers in the healed_sinks[]
- array.
- */
- AFR_INTERSECT (healed_sinks, sinks, locked_on, priv->child_count);
-
- source = __afr_selfheal_entry_finalize_source (this, sources,
- healed_sinks,
- locked_on, replies,
- witness);
-
- if (source < 0) {
- /* If source is < 0 (typically split-brain), we perform a
- conservative merge of entries rather than erroring out */
- }
- *source_p = source;
-
- return ret;
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ uint64_t *witness = NULL;
+
+ priv = this->private;
+
+ ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies);
+ if (ret)
+ return ret;
+
+ witness = alloca0(sizeof(*witness) * priv->child_count);
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_ENTRY_TRANSACTION, locked_on, sources,
+ sinks, witness, pflag);
+ if (ret)
+ return ret;
+
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
+
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ AFR_INTERSECT(healed_sinks, sinks, locked_on, priv->child_count);
+
+ source = __afr_selfheal_entry_finalize_source(this, sources, healed_sinks,
+ locked_on, replies, witness);
+
+ if (source < 0) {
+ /* If source is < 0 (typically split-brain), we perform a
+ conservative merge of entries rather than erroring out */
+ }
+ *source_p = source;
+
+ return ret;
}
static int
-afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this,
- fd_t *fd, char *name)
+afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *name, inode_t *parent_idx_inode,
+ xlator_t *subvol, gf_boolean_t full_crawl)
{
- int ret = 0;
- int source = -1;
- unsigned char *locked_on = NULL;
- unsigned char *sources = NULL;
- unsigned char *sinks = NULL;
- unsigned char *healed_sinks = NULL;
- inode_t *inode = NULL;
- struct afr_reply *replies = NULL;
- struct afr_reply *par_replies = NULL;
- afr_private_t *priv = NULL;
-
- priv = this->private;
-
- sources = alloca0 (priv->child_count);
- sinks = alloca0 (priv->child_count);
- healed_sinks = alloca0 (priv->child_count);
- locked_on = alloca0 (priv->child_count);
-
- replies = alloca0 (priv->child_count * sizeof(*replies));
- par_replies = alloca0 (priv->child_count * sizeof(*par_replies));
-
- ret = afr_selfheal_entrylk (frame, this, fd->inode, this->name, NULL,
- locked_on);
- {
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
- gf_log (this->name, GF_LOG_DEBUG, "%s: Skipping "
- "entry self-heal as only %d sub-volumes could "
- "be locked in %s domain",
- uuid_utoa (fd->inode->gfid), ret, this->name);
- ret = -ENOTCONN;
- goto unlock;
- }
-
- ret = __afr_selfheal_entry_prepare (frame, this, fd->inode,
- locked_on,
- sources, sinks,
- healed_sinks, par_replies,
- &source);
- if (ret < 0)
- goto unlock;
-
- inode = afr_selfheal_unlocked_lookup_on (frame, fd->inode, name,
- replies, locked_on,
- NULL);
- if (!inode) {
- ret = -ENOMEM;
- goto unlock;
- }
-
- ret = __afr_selfheal_entry_dirent (frame, this, fd, name, inode,
- source, sources, healed_sinks,
- locked_on, replies);
- }
+ int ret = 0;
+ int source = -1;
+ unsigned char *locked_on = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ inode_t *inode = NULL;
+ struct afr_reply *replies = NULL;
+ struct afr_reply *par_replies = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+
+ priv = this->private;
+
+ if (afr_is_private_directory(priv, fd->inode->gfid, name,
+ GF_CLIENT_PID_SELF_HEALD)) {
+ return 0;
+ }
+
+ xattr = dict_new();
+ if (!xattr)
+ return -ENOMEM;
+ ret = dict_set_int32_sizen(xattr, GF_GFIDLESS_LOOKUP, 1);
+ if (ret) {
+ dict_unref(xattr);
+ return -1;
+ }
+
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ healed_sinks = alloca0(priv->child_count);
+ locked_on = alloca0(priv->child_count);
+
+ replies = alloca0(priv->child_count * sizeof(*replies));
+ par_replies = alloca0(priv->child_count * sizeof(*par_replies));
+
+ ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,
+ locked_on);
+ {
+ if (ret < priv->child_count) {
+ gf_msg_debug(this->name, 0,
+ "%s: Skipping "
+ "entry self-heal as only %d sub-volumes "
+ " could be locked in %s domain",
+ uuid_utoa(fd->inode->gfid), ret, this->name);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_entry_prepare(frame, this, fd->inode, locked_on,
+ sources, sinks, healed_sinks,
+ par_replies, &source, NULL);
+ if (ret < 0)
+ goto unlock;
+
+ inode = afr_selfheal_unlocked_lookup_on(frame, fd->inode, name, replies,
+ locked_on, xattr);
+ if (!inode) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_entry_dirent(frame, this, fd, name, inode, source,
+ sources, healed_sinks, locked_on,
+ replies);
+
+ if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) {
+ ret = afr_shd_entry_purge(subvol, parent_idx_inode, name,
+ inode->ia_type);
+ /* Why is ret force-set to 0? We do not care about
+ * index purge failing for full heal as it is quite
+ * possible during replace-brick that not all files
+ * and directories have their name indices present in
+ * entry-changes/.
+ */
+ ret = 0;
+ }
+ }
+
unlock:
- afr_selfheal_unentrylk (frame, this, fd->inode, this->name, NULL,
- locked_on);
- if (inode)
- inode_unref (inode);
- if (replies)
- afr_replies_wipe (replies, priv->child_count);
- if (par_replies)
- afr_replies_wipe (par_replies, priv->child_count);
-
- return ret;
+ afr_selfheal_unentrylk(frame, this, fd->inode, this->name, NULL, locked_on,
+ NULL);
+ if (inode)
+ inode_unref(inode);
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
+ if (par_replies)
+ afr_replies_wipe(par_replies, priv->child_count);
+ if (xattr)
+ dict_unref(xattr);
+
+ return ret;
}
+static inode_t *
+afr_shd_entry_changes_index_inode(xlator_t *this, xlator_t *subvol,
+ uuid_t pargfid)
+{
+ int ret = -1;
+ void *index_gfid = NULL;
+ loc_t rootloc = {
+ 0,
+ };
+ loc_t loc = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ inode_t *inode = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+
+ rootloc.inode = inode_ref(this->itable->root);
+ gf_uuid_copy(rootloc.gfid, rootloc.inode->gfid);
+
+ ret = syncop_getxattr(subvol, &rootloc, &xattr,
+ GF_XATTROP_ENTRY_CHANGES_GFID, NULL, NULL);
+ if (ret || !xattr) {
+ errno = -ret;
+ goto out;
+ }
+
+ ret = dict_get_ptr(xattr, GF_XATTROP_ENTRY_CHANGES_GFID, &index_gfid);
+ if (ret) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ loc.inode = inode_new(this->itable);
+ if (!loc.inode) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_copy(loc.pargfid, index_gfid);
+ loc.name = gf_strdup(uuid_utoa(pargfid));
+
+ ret = syncop_lookup(subvol, &loc, &iatt, NULL, NULL, NULL);
+ if (ret < 0) {
+ errno = -ret;
+ goto out;
+ }
+
+ inode = inode_link(loc.inode, NULL, NULL, &iatt);
+
+out:
+ if (xattr)
+ dict_unref(xattr);
+ loc_wipe(&rootloc);
+ GF_FREE((char *)loc.name);
+ loc_wipe(&loc);
+
+ return inode;
+}
+
+static int
+afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int child)
+{
+ int ret = 0;
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ off_t offset = 0;
+ call_frame_t *iter_frame = NULL;
+ xlator_t *subvol = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t mismatch = _gf_false;
+ afr_local_t *local = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ priv = this->private;
+ subvol = priv->children[child];
+
+ INIT_LIST_HEAD(&entries.list);
+
+ local = frame->local;
+
+ iter_frame = afr_copy_frame(frame);
+ if (!iter_frame)
+ return -ENOMEM;
+
+ loc.inode = afr_shd_entry_changes_index_inode(this, subvol,
+ fd->inode->gfid);
+
+ while ((ret = syncop_readdir(subvol, fd, 131072, offset, &entries, NULL,
+ NULL))) {
+ if (ret > 0)
+ ret = 0;
+ list_for_each_entry(entry, &entries.list, list)
+ {
+ offset = entry->d_off;
+
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+ continue;
+
+ ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name,
+ loc.inode, subvol,
+ local->need_full_crawl);
+ AFR_STACK_RESET(iter_frame);
+ if (iter_frame->local == NULL) {
+ ret = -ENOTCONN;
+ break;
+ }
+
+ if (ret == -1) {
+ /* gfid or type mismatch. */
+ mismatch = _gf_true;
+ ret = 0;
+ }
+ if (ret)
+ break;
+ }
+
+ gf_dirent_free(&entries);
+ if (ret)
+ break;
+ }
+
+ loc_wipe(&loc);
+
+ AFR_STACK_DESTROY(iter_frame);
+ if (mismatch == _gf_true)
+ /* undo pending will be skipped */
+ ret = -1;
+ return ret;
+}
static int
-afr_selfheal_entry_do_subvol (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int child)
+afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
+ loc_t *parent, void *data)
{
- int ret = 0;
- gf_dirent_t entries;
- gf_dirent_t *entry = NULL;
- off_t offset = 0;
- call_frame_t *iter_frame = NULL;
- xlator_t *subvol = NULL;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- subvol = priv->children[child];
-
- INIT_LIST_HEAD (&entries.list);
-
- iter_frame = afr_copy_frame (frame);
- if (!iter_frame)
- return -ENOMEM;
-
- while ((ret = syncop_readdir (subvol, fd, 131072, offset, &entries,
- NULL, NULL))) {
- if (ret > 0)
- ret = 0;
- list_for_each_entry (entry, &entries.list, list) {
- offset = entry->d_off;
-
- if (!strcmp (entry->d_name, ".") ||
- !strcmp (entry->d_name, ".."))
- continue;
-
- if (__is_root_gfid (fd->inode->gfid) &&
- !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR))
- continue;
-
- ret = afr_selfheal_entry_dirent (iter_frame, this, fd,
- entry->d_name);
- AFR_STACK_RESET (iter_frame);
-
- if (ret)
- break;
- }
-
- gf_dirent_free (&entries);
- if (ret)
- break;
- }
-
- AFR_STACK_DESTROY (iter_frame);
- return ret;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ afr_granular_esh_args_t *args = data;
+
+ /* Look up the actual inode associated with entry. If the lookup returns
+ * ESTALE or ENOENT, then it means we have a stale index. Remove it.
+ * This is analogous to the check in afr_shd_index_heal() except that
+ * here it is achieved through LOOKUP and in afr_shd_index_heal() through
+ * a GETXATTR.
+ */
+
+ loc.inode = inode_new(args->xl->itable);
+ loc.parent = inode_ref(args->heal_fd->inode);
+ gf_uuid_copy(loc.pargfid, loc.parent->gfid);
+ loc.name = entry->d_name;
+
+ ret = syncop_lookup(args->xl, &loc, &iatt, NULL, NULL, NULL);
+ if ((ret == -ENOENT) || (ret == -ESTALE)) {
+ /* The name indices under the pgfid index dir are guaranteed
+ * to be regular files. Hence the hardcoding.
+ */
+ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
+ ret = 0;
+ goto out;
+ }
+ /* TBD: afr_shd_zero_xattrop? */
+
+ ret = afr_selfheal_entry_dirent(args->frame, args->xl, args->heal_fd,
+ entry->d_name, parent->inode, subvol,
+ _gf_false);
+ AFR_STACK_RESET(args->frame);
+ if (args->frame->local == NULL)
+ ret = -ENOTCONN;
+
+ if (ret == -1)
+ args->mismatch = _gf_true;
+
+out:
+ loc_wipe(&loc);
+ return 0;
}
static int
-afr_selfheal_entry_do (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int source, unsigned char *sources,
- unsigned char *healed_sinks)
+afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int subvol_idx, gf_boolean_t is_src)
{
- int i = 0;
- afr_private_t *priv = NULL;
- int ret = 0;
-
- priv = this->private;
-
- gf_log (this->name, GF_LOG_INFO, "performing entry selfheal on %s",
- uuid_utoa (fd->inode->gfid));
-
- for (i = 0; i < priv->child_count; i++) {
- if (i != source && !healed_sinks[i])
- continue;
- ret = afr_selfheal_entry_do_subvol (frame, this, fd, i);
- if (ret)
- break;
- }
- return ret;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ xlator_t *subvol = NULL;
+ afr_private_t *priv = NULL;
+ afr_granular_esh_args_t args = {
+ 0,
+ };
+
+ priv = this->private;
+ subvol = priv->children[subvol_idx];
+
+ args.frame = afr_copy_frame(frame);
+ if (!args.frame)
+ goto out;
+ args.xl = this;
+ /* args.heal_fd represents the fd associated with the original directory
+ * on which entry heal is being attempted.
+ */
+ args.heal_fd = fd;
+
+ /* @subvol here represents the subvolume of AFR where
+ * indices/entry-changes/<pargfid> will be processed
+ */
+ loc.inode = afr_shd_entry_changes_index_inode(this, subvol,
+ fd->inode->gfid);
+ if (!loc.inode) {
+ /* If granular heal failed on the sink (as it might sometimes
+ * because it is the src that would mostly contain the granular
+ * changelogs and the sink's entry-changes would be empty),
+ * do not treat heal as failure.
+ */
+ if (is_src)
+ ret = -errno;
+ else
+ ret = 0;
+ goto out;
+ }
+
+ ret = syncop_dir_scan(subvol, &loc, GF_CLIENT_PID_SELF_HEALD, &args,
+ afr_selfheal_entry_granular_dirent);
+
+ loc_wipe(&loc);
+
+ if (args.mismatch == _gf_true)
+ ret = -1;
+out:
+ if (args.frame)
+ AFR_STACK_DESTROY(args.frame);
+ return ret;
}
+static int
+afr_selfheal_entry_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source,
+ unsigned char *sources, unsigned char *healed_sinks)
+{
+ int i = 0;
+ int ret = 0;
+ gf_boolean_t mismatch = _gf_false;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+ "performing entry selfheal on %s", uuid_utoa(fd->inode->gfid));
+
+ for (i = 0; i < priv->child_count; i++) {
+ /* Expunge */
+ if (!healed_sinks[i])
+ continue;
+
+ if (!local->need_full_crawl)
+ /* Why call afr_selfheal_entry_granular() on a "healed sink",
+ * given that it is the source that contains the granular
+ * indices?
+ * If the index for this directory is non-existent or empty on
+ * this subvol (=> clear sink), then it will return early
+ * without failure status.
+ * If the index is non-empty and it is yet a 'healed sink', then
+ * it is due to a split-brain in which case we anyway need to
+ * crawl the indices/entry-changes/pargfid directory.
+ */
+ ret = afr_selfheal_entry_granular(frame, this, fd, i, _gf_false);
+ else
+ ret = afr_selfheal_entry_do_subvol(frame, this, fd, i);
+ if (ret == -1) {
+ /* gfid or type mismatch. */
+ mismatch = _gf_true;
+ ret = 0;
+ }
+ if (ret)
+ break;
+ }
+
+ if (!ret && source != -1) {
+ /* Impunge */
+ if (local->need_full_crawl)
+ ret = afr_selfheal_entry_do_subvol(frame, this, fd, source);
+ else
+ ret = afr_selfheal_entry_granular(frame, this, fd, source,
+ _gf_true);
+ }
+
+ if (mismatch == _gf_true)
+ /* undo pending will be skipped */
+ ret = -1;
+ return ret;
+}
static int
-__afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd,
- unsigned char *locked_on)
+__afr_selfheal_entry(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *locked_on)
{
- int ret = -1;
- int source = -1;
- unsigned char *sources = NULL;
- unsigned char *sinks = NULL;
- unsigned char *data_lock = NULL;
- unsigned char *postop_lock = NULL;
- unsigned char *healed_sinks = NULL;
- struct afr_reply *locked_replies = NULL;
- afr_private_t *priv = NULL;
- gf_boolean_t did_sh = _gf_true;
-
- priv = this->private;
-
- sources = alloca0 (priv->child_count);
- sinks = alloca0 (priv->child_count);
- healed_sinks = alloca0 (priv->child_count);
- data_lock = alloca0 (priv->child_count);
- postop_lock = alloca0 (priv->child_count);
-
- locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
-
- ret = afr_selfheal_entrylk (frame, this, fd->inode, this->name, NULL,
- data_lock);
- {
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
- gf_log (this->name, GF_LOG_DEBUG, "%s: Skipping "
- "entry self-heal as only %d sub-volumes could "
- "be locked in %s domain",
- uuid_utoa (fd->inode->gfid), ret,
- this->name);
- ret = -ENOTCONN;
- goto unlock;
- }
-
- ret = __afr_selfheal_entry_prepare (frame, this, fd->inode,
- data_lock, sources, sinks,
- healed_sinks,
- locked_replies, &source);
- if (AFR_COUNT(healed_sinks, priv->child_count) == 0) {
- did_sh = _gf_false;
- goto unlock;
- }
- }
-unlock:
- afr_selfheal_unentrylk (frame, this, fd->inode, this->name, NULL,
- data_lock);
- if (ret < 0)
- goto out;
+ int ret = -1;
+ int source = -1;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *postop_lock = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
+ struct afr_reply *locked_replies = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t did_sh = _gf_true;
+
+ priv = this->private;
+ local = frame->local;
+
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ healed_sinks = alloca0(priv->child_count);
+ undid_pending = alloca0(priv->child_count);
+ data_lock = alloca0(priv->child_count);
+ postop_lock = alloca0(priv->child_count);
+
+ locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
+
+ ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,
+ data_lock);
+ {
+ if (ret < priv->child_count) {
+ gf_msg_debug(this->name, 0,
+ "%s: Skipping "
+ "entry self-heal as only %d sub-volumes could "
+ "be locked in %s domain",
+ uuid_utoa(fd->inode->gfid), ret, this->name);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
- if (!did_sh)
- goto out;
+ ret = __afr_selfheal_entry_prepare(frame, this, fd->inode, data_lock,
+ sources, sinks, healed_sinks,
+ locked_replies, &source, NULL);
+ if (AFR_COUNT(healed_sinks, priv->child_count) == 0) {
+ did_sh = _gf_false;
+ goto unlock;
+ }
- ret = afr_selfheal_entry_do (frame, this, fd, source, sources,
- healed_sinks);
- if (ret)
- goto out;
-
- /* Take entrylks in xlator domain before doing post-op (undo-pending) in
- * entry self-heal. This is to prevent a parallel name self-heal on
- * an entry under @fd->inode from reading pending xattrs while it is
- * being modified by SHD after entry sh below, given that
- * name self-heal takes locks ONLY in xlator domain and is free to read
- * pending changelog in the absence of the following locking.
- */
- ret = afr_selfheal_entrylk (frame, this, fd->inode, this->name, NULL,
- postop_lock);
- {
- if (AFR_CMP (data_lock, postop_lock, priv->child_count) != 0) {
- gf_log (this->name, GF_LOG_DEBUG, "%s: Skipping "
- "post-op after entry self-heal as %d "
- "sub-volumes, as opposed to %d, could be locked"
- " in %s domain", uuid_utoa (fd->inode->gfid),
- ret, AFR_COUNT (data_lock, priv->child_count),
- this->name);
- ret = -ENOTCONN;
- goto postop_unlock;
- }
-
- ret = afr_selfheal_undo_pending (frame, this, fd->inode,
- sources, sinks, healed_sinks,
- AFR_ENTRY_TRANSACTION,
- locked_replies, postop_lock);
+ local->need_full_crawl = afr_need_full_heal(
+ this, locked_replies, source, healed_sinks, AFR_ENTRY_TRANSACTION);
+ }
+unlock:
+ afr_selfheal_unentrylk(frame, this, fd->inode, this->name, NULL, data_lock,
+ NULL);
+ if (ret < 0)
+ goto out;
+
+ if (!did_sh)
+ goto out;
+
+ ret = afr_selfheal_entry_do(frame, this, fd, source, sources, healed_sinks);
+ if (ret)
+ goto out;
+
+ /* Take entrylks in xlator domain before doing post-op (undo-pending) in
+ * entry self-heal. This is to prevent a parallel name self-heal on
+ * an entry under @fd->inode from reading pending xattrs while it is
+ * being modified by SHD after entry sh below, given that
+ * name self-heal takes locks ONLY in xlator domain and is free to read
+ * pending changelog in the absence of the following locking.
+ */
+ ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,
+ postop_lock);
+ {
+ if (AFR_CMP(data_lock, postop_lock, priv->child_count) != 0) {
+ gf_msg_debug(this->name, 0,
+ "%s: Skipping "
+ "post-op after entry self-heal as %d "
+ "sub-volumes, as opposed to %d, "
+ "could be locked in %s domain",
+ uuid_utoa(fd->inode->gfid), ret,
+ AFR_COUNT(data_lock, priv->child_count), this->name);
+ ret = -ENOTCONN;
+ goto postop_unlock;
}
+
+ afr_selfheal_restore_time(frame, this, fd->inode, source, healed_sinks,
+ locked_replies);
+ ret = afr_selfheal_undo_pending(
+ frame, this, fd->inode, sources, sinks, healed_sinks, undid_pending,
+ AFR_ENTRY_TRANSACTION, locked_replies, postop_lock);
+ }
postop_unlock:
- afr_selfheal_unentrylk (frame, this, fd->inode, this->name, NULL,
- postop_lock);
+ afr_selfheal_unentrylk(frame, this, fd->inode, this->name, NULL,
+ postop_lock, NULL);
out:
- if (did_sh)
- afr_log_selfheal (fd->inode->gfid, this, ret, "entry", source,
- healed_sinks);
- else
- ret = 1;
-
- if (locked_replies)
- afr_replies_wipe (locked_replies, priv->child_count);
- return ret;
+ if (did_sh)
+ afr_log_selfheal(fd->inode->gfid, this, ret, "entry", source, sources,
+ healed_sinks);
+ else
+ ret = 1;
+
+ if (locked_replies)
+ afr_replies_wipe(locked_replies, priv->child_count);
+ return ret;
}
-
static fd_t *
-afr_selfheal_data_opendir (xlator_t *this, inode_t *inode)
+afr_selfheal_data_opendir(xlator_t *this, inode_t *inode)
{
- loc_t loc = {0,};
- int ret = 0;
- fd_t *fd = NULL;
-
- fd = fd_create (inode, 0);
- if (!fd)
- return NULL;
-
- loc.inode = inode_ref (inode);
- gf_uuid_copy (loc.gfid, inode->gfid);
-
- ret = syncop_opendir (this, &loc, fd, NULL, NULL);
- if (ret) {
- fd_unref (fd);
- fd = NULL;
- } else {
- fd_bind (fd);
- }
-
- loc_wipe (&loc);
- return fd;
+ loc_t loc = {
+ 0,
+ };
+ int ret = 0;
+ fd_t *fd = NULL;
+
+ fd = fd_create(inode, 0);
+ if (!fd)
+ return NULL;
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ ret = syncop_opendir(this, &loc, fd, NULL, NULL);
+ if (ret) {
+ fd_unref(fd);
+ fd = NULL;
+ } else {
+ fd_bind(fd);
+ }
+
+ loc_wipe(&loc);
+ return fd;
}
-
int
-afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode)
+afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode)
{
- afr_private_t *priv = NULL;
- unsigned char *locked_on = NULL;
- unsigned char *long_name_locked = NULL;
- fd_t *fd = NULL;
- int ret = 0;
-
- priv = this->private;
-
- fd = afr_selfheal_data_opendir (this, inode);
- if (!fd)
- return -EIO;
-
- locked_on = alloca0 (priv->child_count);
- long_name_locked = alloca0 (priv->child_count);
-
- ret = afr_selfheal_tryentrylk (frame, this, inode, priv->sh_domain, NULL,
- locked_on);
- {
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
- gf_log (this->name, GF_LOG_DEBUG, "%s: Skipping "
- "entry self-heal as only %d sub-volumes could "
- "be locked in %s domain",
- uuid_utoa (fd->inode->gfid), ret,
- priv->sh_domain);
- /* Either less than two subvols available, or another
- selfheal (from another server) is in progress. Skip
- for now in any case there isn't anything to do.
- */
- ret = -ENOTCONN;
- goto unlock;
- }
-
- ret = afr_selfheal_tryentrylk (frame, this, inode, this->name,
- LONG_FILENAME, long_name_locked);
- {
- if (ret < 1) {
- gf_log (this->name, GF_LOG_DEBUG, "%s: Skipping"
- " entry self-heal as only %d "
- "sub-volumes could be locked in "
- "special-filename domain",
- uuid_utoa (fd->inode->gfid),
- ret);
- ret = -ENOTCONN;
- goto unlock;
- }
- ret = __afr_selfheal_entry (frame, this, fd, locked_on);
- }
- afr_selfheal_unentrylk (frame, this, inode, this->name,
- LONG_FILENAME, long_name_locked);
- }
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ fd = afr_selfheal_data_opendir(this, inode);
+ if (!fd)
+ return -EIO;
+
+ locked_on = alloca0(priv->child_count);
+
+ ret = afr_selfheal_tie_breaker_entrylk(frame, this, inode, priv->sh_domain,
+ NULL, locked_on);
+ {
+ if (ret < priv->child_count) {
+ gf_msg_debug(this->name, 0,
+ "%s: Skipping "
+ "entry self-heal as only %d sub-volumes could "
+ "be locked in %s domain",
+ uuid_utoa(fd->inode->gfid), ret, priv->sh_domain);
+ /* Either less than two subvols available, or another
+ selfheal (from another server) is in progress. Skip
+ for now in any case there isn't anything to do.
+ */
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_entry(frame, this, fd, locked_on);
+ }
unlock:
- afr_selfheal_unentrylk (frame, this, inode, priv->sh_domain, NULL, locked_on);
+ afr_selfheal_unentrylk(frame, this, inode, priv->sh_domain, NULL, locked_on,
+ NULL);
- if (fd)
- fd_unref (fd);
+ if (fd)
+ fd_unref(fd);
- return ret;
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index 11c0c9c8cd9..03f43bad16e 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -8,110 +8,108 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "afr.h"
#include "afr-self-heal.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "protocol-common.h"
+#include <glusterfs/events.h>
-#define AFR_HEAL_ATTR (GF_SET_ATTR_UID|GF_SET_ATTR_GID|GF_SET_ATTR_MODE)
+#define AFR_HEAL_ATTR (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE)
static gf_boolean_t
-_afr_ignorable_key_match (dict_t *d, char *k, data_t *val, void *mdata)
+_afr_ignorable_key_match(dict_t *d, char *k, data_t *val, void *mdata)
{
- return afr_is_xattr_ignorable (k);
+ return afr_is_xattr_ignorable(k);
}
void
-afr_delete_ignorable_xattrs (dict_t *xattr)
+afr_delete_ignorable_xattrs(dict_t *xattr)
{
- dict_foreach_match (xattr, _afr_ignorable_key_match, NULL,
- dict_remove_foreach_fn, NULL);
+ dict_foreach_match(xattr, _afr_ignorable_key_match, NULL,
+ dict_remove_foreach_fn, NULL);
}
int
-__afr_selfheal_metadata_do (call_frame_t *frame, xlator_t *this, inode_t *inode,
- int source, unsigned char *healed_sinks,
- struct afr_reply *locked_replies)
+__afr_selfheal_metadata_do(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, unsigned char *healed_sinks,
+ struct afr_reply *locked_replies)
{
- int ret = -1;
- loc_t loc = {0,};
- dict_t *xattr = NULL;
- dict_t *old_xattr = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
-
- priv = this->private;
-
- loc.inode = inode_ref (inode);
- gf_uuid_copy (loc.gfid, inode->gfid);
-
- gf_log (this->name, GF_LOG_INFO, "performing metadata selfheal on %s",
- uuid_utoa (inode->gfid));
-
- ret = syncop_getxattr (priv->children[source], &loc, &xattr, NULL,
- NULL, NULL);
- if (ret < 0) {
- ret = -EIO;
- goto out;
- }
-
- afr_delete_ignorable_xattrs (xattr);
-
- for (i = 0; i < priv->child_count; i++) {
- if (old_xattr) {
- dict_unref (old_xattr);
- old_xattr = NULL;
- }
-
- if (!healed_sinks[i])
- continue;
-
- ret = syncop_setattr (priv->children[i], &loc,
- &locked_replies[source].poststat,
- AFR_HEAL_ATTR, NULL, NULL, NULL, NULL);
- if (ret)
- healed_sinks[i] = 0;
-
- ret = syncop_getxattr (priv->children[i], &loc, &old_xattr, 0,
- NULL, NULL);
- if (old_xattr) {
- afr_delete_ignorable_xattrs (old_xattr);
- ret = syncop_removexattr (priv->children[i], &loc, "",
- old_xattr, NULL);
- }
-
- ret = syncop_setxattr (priv->children[i], &loc, xattr, 0, NULL,
- NULL);
- if (ret)
- healed_sinks[i] = 0;
- }
- ret = 0;
+ int ret = -1;
+ loc_t loc = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ dict_t *old_xattr = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+ "performing metadata selfheal on %s", uuid_utoa(inode->gfid));
+
+ ret = syncop_getxattr(priv->children[source], &loc, &xattr, NULL, NULL,
+ NULL);
+ if (ret < 0) {
+ ret = -EIO;
+ goto out;
+ }
+
+ afr_delete_ignorable_xattrs(xattr);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (old_xattr) {
+ dict_unref(old_xattr);
+ old_xattr = NULL;
+ }
+
+ if (!healed_sinks[i])
+ continue;
+
+ ret = syncop_setattr(priv->children[i], &loc,
+ &locked_replies[source].poststat, AFR_HEAL_ATTR,
+ NULL, NULL, NULL, NULL);
+ if (ret)
+ healed_sinks[i] = 0;
+
+ ret = syncop_getxattr(priv->children[i], &loc, &old_xattr, 0, NULL,
+ NULL);
+ if (old_xattr) {
+ afr_delete_ignorable_xattrs(old_xattr);
+ ret = syncop_removexattr(priv->children[i], &loc, "", old_xattr,
+ NULL);
+ if (ret)
+ healed_sinks[i] = 0;
+ }
+
+ ret = syncop_setxattr(priv->children[i], &loc, xattr, 0, NULL, NULL);
+ if (ret)
+ healed_sinks[i] = 0;
+ }
+ ret = 0;
out:
- loc_wipe (&loc);
- if (xattr)
- dict_unref (xattr);
- if (old_xattr)
- dict_unref (old_xattr);
+ loc_wipe(&loc);
+ if (xattr)
+ dict_unref(xattr);
+ if (old_xattr)
+ dict_unref(old_xattr);
- return ret;
+ return ret;
}
-static inline uint64_t
+static uint64_t
mtime_ns(struct iatt *ia)
{
- uint64_t ret;
+ uint64_t ret;
- ret = (((uint64_t)(ia->ia_mtime)) * 1000000000)
- + (uint64_t)(ia->ia_mtime_nsec);
+ ret = (((uint64_t)(ia->ia_mtime)) * 1000000000) +
+ (uint64_t)(ia->ia_mtime_nsec);
- return ret;
+ return ret;
}
/*
@@ -124,315 +122,425 @@ mtime_ns(struct iatt *ia)
* the source with the most recent modification date.
*/
static int
-afr_dirtime_splitbrain_source (call_frame_t *frame, xlator_t *this,
- struct afr_reply *replies,
- unsigned char *locked_on)
+afr_dirtime_splitbrain_source(call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies,
+ unsigned char *locked_on)
{
- afr_private_t *priv = NULL;
- int source = -1;
- struct iatt source_ia;
- struct iatt child_ia;
- uint64_t mtime = 0;
- int i;
- int ret = -1;
-
- priv = this->private;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ struct iatt source_ia;
+ struct iatt child_ia;
+ uint64_t mtime = 0;
+ int i;
+ int ret = -1;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!locked_on[i])
+ continue;
+
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret != 0)
+ continue;
+
+ if (mtime_ns(&replies[i].poststat) <= mtime)
+ continue;
+
+ mtime = mtime_ns(&replies[i].poststat);
+ source = i;
+ }
+
+ if (source == -1)
+ goto out;
+
+ source_ia = replies[source].poststat;
+ if (source_ia.ia_type != IA_IFDIR)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source)
+ continue;
+
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret != 0)
+ continue;
+
+ child_ia = replies[i].poststat;
+
+ if (!IA_EQUAL(source_ia, child_ia, gfid) ||
+ !IA_EQUAL(source_ia, child_ia, type) ||
+ !IA_EQUAL(source_ia, child_ia, prot) ||
+ !IA_EQUAL(source_ia, child_ia, uid) ||
+ !IA_EQUAL(source_ia, child_ia, gid) ||
+ !afr_xattrs_are_equal(replies[source].xdata, replies[i].xdata))
+ goto out;
+ }
+
+ /*
+ * Metadata split brain is just about [amc]time
+ * We return our source.
+ */
+ ret = source;
+out:
+ return ret;
+}
- for (i = 0; i < priv->child_count; i++) {
- if (!locked_on[i])
- continue;
+static int
+__afr_selfheal_metadata_mark_pending_xattrs(call_frame_t *frame, xlator_t *this,
+ inode_t *inode,
+ struct afr_reply *replies,
+ unsigned char *sources)
+{
+ int ret = 0;
+ int i = 0;
+ int m_idx = 0;
+ afr_private_t *priv = NULL;
+ int raw[AFR_NUM_CHANGE_LOGS] = {0};
+ dict_t *xattr = NULL;
+
+ priv = this->private;
+ m_idx = afr_index_for_transaction_type(AFR_METADATA_TRANSACTION);
+ raw[m_idx] = 1;
+
+ xattr = dict_new();
+ if (!xattr)
+ return -ENOMEM;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i])
+ continue;
+ ret = dict_set_static_bin(xattr, priv->pending_key[i], raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ ret = afr_selfheal_post_op(frame, this, inode, i, xattr, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_SELF_HEAL_INFO,
+ "Failed to set pending metadata xattr on child %d for %s", i,
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+ }
- if (!replies[i].valid)
- continue;
+ afr_replies_wipe(replies, priv->child_count);
+ ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies);
- if (replies[i].op_ret != 0)
- continue;
+out:
+ if (xattr)
+ dict_unref(xattr);
+ return ret;
+}
- if (mtime_ns(&replies[i].poststat) <= mtime)
- continue;
+/*
+ * Look for mismatching uid/gid or mode or user xattrs even if
+ * AFR xattrs don't say so, and pick one arbitrarily as winner. */
- mtime = mtime_ns(&replies[i].poststat);
- source = i;
+static int
+__afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ unsigned char *locked_on,
+ struct afr_reply *replies)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ struct iatt srcstat = {
+ 0,
+ };
+ int source = -1;
+ int sources_count = 0;
+ int ret = 0;
+
+ priv = this->private;
+
+ sources_count = AFR_COUNT(sources, priv->child_count);
+
+ if ((AFR_CMP(locked_on, healed_sinks, priv->child_count) == 0) ||
+ !sources_count) {
+ source = afr_mark_split_brain_source_sinks(
+ frame, this, inode, sources, sinks, healed_sinks, locked_on,
+ replies, AFR_METADATA_TRANSACTION);
+ if (source >= 0) {
+ _afr_fav_child_reset_sink_xattrs(
+ frame, this, inode, source, healed_sinks, undid_pending,
+ AFR_METADATA_TRANSACTION, locked_on, replies);
+ goto out;
}
- if (source == -1)
- goto out;
+ /* If this is a directory mtime/ctime only split brain
+ use the most recent */
+ source = afr_dirtime_splitbrain_source(frame, this, replies, locked_on);
+ if (source != -1) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SPLIT_BRAIN,
+ "clear time "
+ "split brain on %s",
+ uuid_utoa(replies[source].poststat.ia_gfid));
+ sources[source] = 1;
+ healed_sinks[source] = 0;
+ goto out;
+ }
- source_ia = replies[source].poststat;
- if (source_ia.ia_type != IA_IFDIR)
- goto out;
+ if (!priv->metadata_splitbrain_forced_heal) {
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
+ "subvol=%s;"
+ "type=metadata;file=%s",
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(inode->gfid));
+ return -EIO;
+ }
+ /* Metadata split brain, select one subvol
+ arbitrarily */
for (i = 0; i < priv->child_count; i++) {
- if (i == source)
- continue;
-
- if (!replies[i].valid)
- continue;
-
- if (replies[i].op_ret != 0)
- continue;
+ if (locked_on[i] && healed_sinks[i]) {
+ sources[i] = 1;
+ healed_sinks[i] = 0;
+ break;
+ }
+ }
+ }
+
+ /* No split brain at this point. If we were called from
+ * afr_heal_splitbrain_file(), abort.*/
+ if (afr_dict_contains_heal_op(frame))
+ return -EIO;
+
+ source = afr_choose_source_by_policy(priv, sources,
+ AFR_METADATA_TRANSACTION);
+ srcstat = replies[source].poststat;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i] || i == source)
+ continue;
+ if (!IA_EQUAL(srcstat, replies[i].poststat, type) ||
+ !IA_EQUAL(srcstat, replies[i].poststat, uid) ||
+ !IA_EQUAL(srcstat, replies[i].poststat, gid) ||
+ !IA_EQUAL(srcstat, replies[i].poststat, prot)) {
+ gf_msg_debug(this->name, 0,
+ "%s: iatt mismatch "
+ "for source(%d) vs (%d)",
+ uuid_utoa(replies[source].poststat.ia_gfid), source,
+ i);
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i] || i == source)
+ continue;
+ if (!afr_xattrs_are_equal(replies[source].xdata, replies[i].xdata)) {
+ gf_msg_debug(this->name, 0,
+ "%s: xattr mismatch "
+ "for source(%d) vs (%d)",
+ uuid_utoa(replies[source].poststat.ia_gfid), source,
+ i);
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ }
+ }
+ if ((sources_count == priv->child_count) && (source > -1) &&
+ (AFR_COUNT(healed_sinks, priv->child_count) != 0)) {
+ ret = __afr_selfheal_metadata_mark_pending_xattrs(frame, this, inode,
+ replies, sources);
+ if (ret < 0)
+ return ret;
+ }
+out:
+ afr_mark_active_sinks(this, sources, locked_on, healed_sinks);
+ return source;
+}
- child_ia = replies[i].poststat;
+int
+__afr_selfheal_metadata_prepare(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ struct afr_reply *replies, unsigned char *pflag)
+{
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ uint64_t *witness = NULL;
- if (!IA_EQUAL(source_ia, child_ia, gfid) ||
- !IA_EQUAL(source_ia, child_ia, type) ||
- !IA_EQUAL(source_ia, child_ia, prot) ||
- !IA_EQUAL(source_ia, child_ia, uid) ||
- !IA_EQUAL(source_ia, child_ia, gid) ||
- !afr_xattrs_are_equal (replies[source].xdata,
- replies[i].xdata))
- goto out;
- }
+ priv = this->private;
- /*
- * Metadata split brain is just about [amc]time
- * We return our source.
- */
- ret = source;
-out:
+ ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies);
+ if (ret)
return ret;
-}
+ witness = alloca0(sizeof(*witness) * priv->child_count);
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_METADATA_TRANSACTION, locked_on,
+ sources, sinks, witness, pflag);
+ if (ret)
+ return ret;
-/*
- * Look for mismatching uid/gid or mode or user xattrs even if
- * AFR xattrs don't say so, and pick one arbitrarily as winner. */
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
+
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ AFR_INTERSECT(healed_sinks, sinks, locked_on, priv->child_count);
+
+ /* If any source has witness, pick first
+ * witness source and make everybody else sinks */
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i] && witness[i]) {
+ source = i;
+ break;
+ }
+ }
-static int
-__afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
- unsigned char *sources,
- unsigned char *sinks,
- unsigned char *healed_sinks,
- unsigned char *locked_on,
- struct afr_reply *replies)
-{
- int i = 0;
- afr_private_t *priv = NULL;
- struct iatt first = {0, };
- int source = -1;
- int sources_count = 0;
-
- priv = this->private;
-
- sources_count = AFR_COUNT (sources, priv->child_count);
-
- if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0)
- || !sources_count) {
-
- source = afr_mark_split_brain_source_sinks (frame, this,
- sources, sinks,
- healed_sinks,
- locked_on, replies,
- AFR_METADATA_TRANSACTION);
- if (source >= 0)
- return source;
-
- /* If this is a directory mtime/ctime only split brain
- use the most recent */
- source = afr_dirtime_splitbrain_source (frame, this,
- replies, locked_on);
- if (source != -1) {
- gf_log (this->name, GF_LOG_NOTICE, "clear time "
- "split brain on %s",
- uuid_utoa (replies[source].poststat.ia_gfid));
- sources[source] = 1;
- healed_sinks[source] = 0;
- return source;
- }
-
- if (!priv->metadata_splitbrain_forced_heal) {
- return -EIO;
- }
-
- /* Metadata split brain, select one subvol
- arbitrarily */
- for (i = 0; i < priv->child_count; i++) {
- if (locked_on[i] && healed_sinks[i]) {
- sources[i] = 1;
- healed_sinks[i] = 0;
- break;
- }
- }
- }
-
- /* No split brain at this point. If we were called from
- * afr_heal_splitbrain_file(), abort.*/
- if (afr_dict_contains_heal_op(frame))
- return -EIO;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!sources[i])
- continue;
- if (source == -1) {
- source = i;
- first = replies[i].poststat;
- break;
- }
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (!sources[i] || i == source)
- continue;
- if (!IA_EQUAL (first, replies[i].poststat, type) ||
- !IA_EQUAL (first, replies[i].poststat, uid) ||
- !IA_EQUAL (first, replies[i].poststat, gid) ||
- !IA_EQUAL (first, replies[i].poststat, prot)) {
- gf_log (this->name, GF_LOG_DEBUG, "%s: iatt mismatch "
- "for source(%d) vs (%d)",
- uuid_utoa (replies[source].poststat.ia_gfid),
- source, i);
- sources[i] = 0;
- healed_sinks[i] = 1;
- }
- }
-
- for (i =0; i < priv->child_count; i++) {
- if (!sources[i] || i == source)
- continue;
- if (!afr_xattrs_are_equal (replies[source].xdata,
- replies[i].xdata)) {
- gf_log (this->name, GF_LOG_DEBUG, "%s: xattr mismatch "
- "for source(%d) vs (%d)",
- uuid_utoa (replies[source].poststat.ia_gfid),
- source, i);
- sources[i] = 0;
- healed_sinks[i] = 1;
- }
+ if (source != -1) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (i != source && sources[i]) {
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ }
}
+ }
- return source;
-}
+ source = __afr_selfheal_metadata_finalize_source(
+ frame, this, inode, sources, sinks, healed_sinks, undid_pending,
+ locked_on, replies);
+ if (source < 0)
+ return -EIO;
+
+ return source;
+}
int
-__afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode,
- unsigned char *locked_on, unsigned char *sources,
- unsigned char *sinks, unsigned char *healed_sinks,
- struct afr_reply *replies)
+afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode)
{
- int ret = -1;
- int source = -1;
- afr_private_t *priv = NULL;
- int i = 0;
- uint64_t *witness = NULL;
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
+ struct afr_reply *locked_replies = NULL;
+ gf_boolean_t did_sh = _gf_true;
+ int source = -1;
+
+ priv = this->private;
+
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ healed_sinks = alloca0(priv->child_count);
+ undid_pending = alloca0(priv->child_count);
+ data_lock = alloca0(priv->child_count);
+
+ locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
+
+ ret = afr_selfheal_inodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
+ data_lock);
+ {
+ if (ret < priv->child_count) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
- priv = this->private;
+ ret = __afr_selfheal_metadata_prepare(
+ frame, this, inode, data_lock, sources, sinks, healed_sinks,
+ undid_pending, locked_replies, NULL);
+ if (ret < 0)
+ goto unlock;
- ret = afr_selfheal_unlocked_discover (frame, inode, inode->gfid,
- replies);
- if (ret)
- return ret;
-
- witness = alloca0 (sizeof (*witness) * priv->child_count);
- ret = afr_selfheal_find_direction (frame, this, replies,
- AFR_METADATA_TRANSACTION,
- locked_on, sources, sinks, witness);
- if (ret)
- return ret;
-
- /* Initialize the healed_sinks[] array optimistically to
- the intersection of to-be-healed (i.e sinks[]) and
- the list of servers which are up (i.e locked_on[]).
-
- As we encounter failures in the healing process, we
- will unmark the respective servers in the healed_sinks[]
- array.
- */
- AFR_INTERSECT (healed_sinks, sinks, locked_on, priv->child_count);
-
- /* If any source has witness, pick first
- * witness source and make everybody else sinks */
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i] && witness[i]) {
- source = i;
- break;
- }
- }
+ source = ret;
- if (source != -1) {
- for (i = 0; i < priv->child_count; i++) {
- if (i != source && sources[i]) {
- sources[i] = 0;
- healed_sinks[i] = 1;
- }
- }
+ if (AFR_COUNT(healed_sinks, priv->child_count) == 0) {
+ did_sh = _gf_false;
+ goto unlock;
}
- source = __afr_selfheal_metadata_finalize_source (frame, this, sources,
- sinks, healed_sinks,
- locked_on, replies);
+ ret = __afr_selfheal_metadata_do(frame, this, inode, source,
+ healed_sinks, locked_replies);
+ if (ret)
+ goto unlock;
- if (source < 0)
- return -EIO;
+ afr_selfheal_restore_time(frame, this, inode, source, healed_sinks,
+ locked_replies);
- return source;
+ ret = afr_selfheal_undo_pending(
+ frame, this, inode, sources, sinks, healed_sinks, undid_pending,
+ AFR_METADATA_TRANSACTION, locked_replies, data_lock);
+ }
+unlock:
+ afr_selfheal_uninodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
+ data_lock);
+
+ if (did_sh)
+ afr_log_selfheal(inode->gfid, this, ret, "metadata", source, sources,
+ healed_sinks);
+ else
+ ret = 1;
+
+ if (locked_replies)
+ afr_replies_wipe(locked_replies, priv->child_count);
+ return ret;
}
int
-afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode)
+afr_selfheal_metadata_by_stbuf(xlator_t *this, struct iatt *stbuf)
{
- afr_private_t *priv = NULL;
- int ret = -1;
- unsigned char *sources = NULL;
- unsigned char *sinks = NULL;
- unsigned char *data_lock = NULL;
- unsigned char *healed_sinks = NULL;
- struct afr_reply *locked_replies = NULL;
- gf_boolean_t did_sh = _gf_true;
- int source = -1;
-
- priv = this->private;
-
- sources = alloca0 (priv->child_count);
- sinks = alloca0 (priv->child_count);
- healed_sinks = alloca0 (priv->child_count);
- data_lock = alloca0 (priv->child_count);
-
- locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
-
- ret = afr_selfheal_inodelk (frame, this, inode, this->name,
- LLONG_MAX - 1, 0, data_lock);
- {
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
- ret = -ENOTCONN;
- goto unlock;
- }
-
- ret = __afr_selfheal_metadata_prepare (frame, this, inode,
- data_lock, sources,
- sinks, healed_sinks,
- locked_replies);
- if (ret < 0)
- goto unlock;
-
- source = ret;
-
- if (AFR_COUNT (healed_sinks, priv->child_count) == 0) {
- did_sh = _gf_false;
- goto unlock;
- }
-
- ret = __afr_selfheal_metadata_do (frame, this, inode, source,
- healed_sinks, locked_replies);
- if (ret)
- goto unlock;
-
- ret = afr_selfheal_undo_pending (frame, this, inode, sources,
- sinks, healed_sinks,
- AFR_METADATA_TRANSACTION,
- locked_replies, data_lock);
- }
-unlock:
- afr_selfheal_uninodelk (frame, this, inode, this->name,
- LLONG_MAX -1, 0, data_lock);
-
- if (did_sh)
- afr_log_selfheal (inode->gfid, this, ret, "metadata", source,
- healed_sinks);
- else
- ret = 1;
-
- if (locked_replies)
- afr_replies_wipe (locked_replies, priv->child_count);
- return ret;
+ inode_t *inode = NULL;
+ inode_t *link_inode = NULL;
+ call_frame_t *frame = NULL;
+ int ret = 0;
+
+ if (gf_uuid_is_null(stbuf->ia_gfid)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ inode = inode_new(this->itable);
+ if (!inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ link_inode = inode_link(inode, NULL, NULL, stbuf);
+ if (!link_inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ frame = afr_frame_create(this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+
+ ret = afr_selfheal_metadata(frame, this, link_inode);
+out:
+ if (inode)
+ inode_unref(inode);
+ if (link_inode)
+ inode_unref(link_inode);
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
index ba9c3eeb6ea..834aac86d48 100644
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -8,701 +8,609 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
+#include <glusterfs/events.h>
#include "afr.h"
#include "afr-self-heal.h"
+#include "afr-messages.h"
int
-__afr_selfheal_assign_gfid (xlator_t *this, inode_t *parent, uuid_t pargfid,
- const char *bname, inode_t *inode,
- struct afr_reply *replies, void *gfid,
- unsigned char *locked_on,
- gf_boolean_t is_gfid_absent)
+__afr_selfheal_assign_gfid(xlator_t *this, inode_t *parent, uuid_t pargfid,
+ const char *bname, inode_t *inode,
+ struct afr_reply *replies, void *gfid,
+ unsigned char *locked_on, int source,
+ unsigned char *sources, gf_boolean_t is_gfid_absent,
+ int *gfid_idx)
{
- int ret = 0;
- int up_count = 0;
- int locked_count = 0;
- afr_private_t *priv = NULL;
- dict_t *xdata = NULL;
- loc_t loc = {0, };
- call_frame_t *new_frame = NULL;
- afr_local_t *new_local = NULL;
-
- priv = this->private;
-
- new_frame = afr_frame_create (this);
- if (!new_frame) {
- ret = -ENOMEM;
- goto out;
- }
-
- new_local = new_frame->local;
+ int ret = 0;
+ int up_count = 0;
+ int locked_count = 0;
+ afr_private_t *priv = NULL;
- gf_uuid_copy (parent->gfid, pargfid);
+ priv = this->private;
- xdata = dict_new ();
- if (!xdata) {
- ret = -ENOMEM;
- goto out;
- }
+ gf_uuid_copy(parent->gfid, pargfid);
- ret = dict_set_static_bin (xdata, "gfid-req", gfid, 16);
- if (ret) {
- ret = -ENOMEM;
- goto out;
- }
-
- loc.parent = inode_ref (parent);
- loc.inode = inode_ref (inode);
- gf_uuid_copy (loc.pargfid, pargfid);
- loc.name = bname;
-
- if (is_gfid_absent) {
- /* Ensure all children of AFR are up before performing gfid heal, to
- * guard against the possibility of gfid split brain. */
-
- up_count = AFR_COUNT (priv->child_up, priv->child_count);
- if (up_count != priv->child_count) {
- ret = -EIO;
- goto out;
- }
+ if (is_gfid_absent) {
+ /* Ensure all children of AFR are up before performing gfid heal, to
+ * guard against the possibility of gfid split brain. */
- locked_count = AFR_COUNT (locked_on, priv->child_count);
- if (locked_count != priv->child_count) {
- ret = -EIO;
- goto out;
- }
+ up_count = AFR_COUNT(priv->child_up, priv->child_count);
+ if (up_count != priv->child_count) {
+ ret = -EIO;
+ goto out;
}
- /* Clear out old replies here and wind lookup on all locked
- * subvolumes to achieve two things:
- * a. gfid heal on those subvolumes that do not have gfid associated
- * with the inode, and
- * b. refresh replies, which can be consumed by
- * __afr_selfheal_name_impunge().
- */
-
- afr_replies_wipe (replies, priv->child_count);
-
- AFR_ONLIST (locked_on, new_frame, afr_selfheal_discover_cbk, lookup,
- &loc, xdata);
+ locked_count = AFR_COUNT(locked_on, priv->child_count);
+ if (locked_count != priv->child_count) {
+ ret = -EIO;
+ goto out;
+ }
+ }
- afr_replies_copy (replies, new_local->replies, priv->child_count);
+ ret = afr_lookup_and_heal_gfid(this, parent, bname, inode, replies, source,
+ sources, gfid, gfid_idx);
out:
- loc_wipe (&loc);
- if (xdata)
- dict_unref (xdata);
- if (new_frame)
- AFR_STACK_DESTROY (new_frame);
-
- return ret;
+ return ret;
}
int
-__afr_selfheal_name_impunge (call_frame_t *frame, xlator_t *this,
- inode_t *parent, uuid_t pargfid,
- const char *bname, inode_t *inode,
- struct afr_reply *replies, int gfid_idx)
+__afr_selfheal_name_impunge(call_frame_t *frame, xlator_t *this,
+ inode_t *parent, uuid_t pargfid, const char *bname,
+ inode_t *inode, struct afr_reply *replies,
+ int gfid_idx)
{
- int i = 0;
- afr_private_t *priv = NULL;
- int ret = 0;
- unsigned char *newentry = NULL;
- unsigned char *sources = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ unsigned char *sources = NULL;
- priv = this->private;
+ priv = this->private;
- newentry = alloca0 (priv->child_count);
- sources = alloca0 (priv->child_count);
+ sources = alloca0(priv->child_count);
- gf_uuid_copy (parent->gfid, pargfid);
+ gf_uuid_copy(parent->gfid, pargfid);
- for (i = 0; i < priv->child_count; i++) {
- if (!replies[i].valid)
- continue;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
- if (gf_uuid_compare (replies[i].poststat.ia_gfid,
- replies[gfid_idx].poststat.ia_gfid) == 0) {
- sources[i] = 1;
- continue;
- }
-
- ret |= afr_selfheal_recreate_entry (this, i, gfid_idx, parent,
- bname, inode, replies,
- newentry);
- }
-
- if (AFR_COUNT (newentry, priv->child_count))
- afr_selfheal_newentry_mark (frame, this, inode, gfid_idx, replies,
- sources, newentry);
- return ret;
-}
+ if (gf_uuid_compare(replies[i].poststat.ia_gfid,
+ replies[gfid_idx].poststat.ia_gfid) == 0) {
+ sources[i] = 1;
+ continue;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i])
+ continue;
-int
-__afr_selfheal_name_expunge (xlator_t *this, inode_t *parent, uuid_t pargfid,
- const char *bname, inode_t *inode,
- struct afr_reply *replies)
-{
- loc_t loc = {0, };
- int i = 0;
- afr_private_t *priv = NULL;
- char g[64];
- int ret = 0;
-
- priv = this->private;
-
- loc.parent = inode_ref (parent);
- gf_uuid_copy (loc.pargfid, pargfid);
- loc.name = bname;
- loc.inode = inode_ref (inode);
-
- for (i = 0; i < priv->child_count; i++) {
- if (!replies[i].valid)
- continue;
-
- if (replies[i].op_ret)
- continue;
-
- switch (replies[i].poststat.ia_type) {
- case IA_IFDIR:
- gf_log (this->name, GF_LOG_WARNING,
- "expunging dir %s/%s (%s) on %s",
- uuid_utoa (pargfid), bname,
- uuid_utoa_r (replies[i].poststat.ia_gfid, g),
- priv->children[i]->name);
- ret |= syncop_rmdir (priv->children[i], &loc, 1, NULL,
- NULL);
- break;
- default:
- gf_log (this->name, GF_LOG_WARNING,
- "expunging file %s/%s (%s) on %s",
- uuid_utoa (pargfid), bname,
- uuid_utoa_r (replies[i].poststat.ia_gfid, g),
- priv->children[i]->name);
- ret |= syncop_unlink (priv->children[i], &loc, NULL,
- NULL);
- break;
- }
- }
-
- loc_wipe (&loc);
-
- return ret;
+ ret |= afr_selfheal_recreate_entry(frame, i, gfid_idx, sources, parent,
+ bname, inode, replies);
+ }
+ return ret;
}
-/* This function is to be called after ensuring that there is no gfid mismatch
- * for the inode across multiple sources
- */
-static int
-afr_selfheal_gfid_idx_get (xlator_t *this, struct afr_reply *replies,
- unsigned char *sources)
+int
+__afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
+ const char *bname, inode_t *inode,
+ struct afr_reply *replies)
{
- int i = 0;
- int gfid_idx = -1;
- afr_private_t *priv = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
- priv = this->private;
+ priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- if (!replies[i].valid)
- continue;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
- if (!sources[i])
- continue;
+ if (replies[i].op_ret)
+ continue;
- if (gf_uuid_is_null (replies[i].poststat.ia_gfid))
- continue;
+ ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i,
+ replies);
+ }
- gfid_idx = i;
- break;
- }
- return gfid_idx;
+ return ret;
}
static gf_boolean_t
-afr_selfheal_name_need_heal_check (xlator_t *this, struct afr_reply *replies)
+afr_selfheal_name_need_heal_check(xlator_t *this, struct afr_reply *replies)
{
- int i = 0;
- int first_idx = -1;
- gf_boolean_t need_heal = _gf_false;
- afr_private_t *priv = NULL;
+ int i = 0;
+ int first_idx = -1;
+ gf_boolean_t need_heal = _gf_false;
+ afr_private_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- if (!replies[i].valid)
- continue;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
- if ((replies[i].op_ret == -1) &&
- (replies[i].op_errno == ENODATA))
- need_heal = _gf_true;
+ if ((replies[i].op_ret == -1) && (replies[i].op_errno == ENODATA))
+ need_heal = _gf_true;
- if (first_idx == -1) {
- first_idx = i;
- continue;
- }
-
- if (replies[i].op_ret != replies[first_idx].op_ret)
- need_heal = _gf_true;
+ if (first_idx == -1) {
+ first_idx = i;
+ continue;
+ }
- if (gf_uuid_compare (replies[i].poststat.ia_gfid,
- replies[first_idx].poststat.ia_gfid))
- need_heal = _gf_true;
+ if (replies[i].op_ret != replies[first_idx].op_ret)
+ need_heal = _gf_true;
- if ((replies[i].op_ret == 0) &&
- (gf_uuid_is_null(replies[i].poststat.ia_gfid)))
- need_heal = _gf_true;
+ if (gf_uuid_compare(replies[i].poststat.ia_gfid,
+ replies[first_idx].poststat.ia_gfid))
+ need_heal = _gf_true;
- }
+ if ((replies[i].op_ret == 0) &&
+ (gf_uuid_is_null(replies[i].poststat.ia_gfid)))
+ need_heal = _gf_true;
+ }
- return need_heal;
+ return need_heal;
}
static int
-afr_selfheal_name_type_mismatch_check (xlator_t *this, struct afr_reply *replies,
- int source, unsigned char *sources,
- uuid_t pargfid, const char *bname)
+afr_selfheal_name_type_mismatch_check(xlator_t *this, struct afr_reply *replies,
+ int source, unsigned char *sources,
+ uuid_t pargfid, const char *bname)
{
- int i = 0;
- int type_idx = -1;
- ia_type_t inode_type = IA_INVAL;
- afr_private_t *priv = NULL;
+ int i = 0;
+ int type_idx = -1;
+ ia_type_t inode_type = IA_INVAL;
+ ia_type_t inode_type1 = IA_INVAL;
+ afr_private_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- if (!replies[i].valid)
- continue;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
- if (replies[i].poststat.ia_type == IA_INVAL)
- continue;
+ if (replies[i].poststat.ia_type == IA_INVAL)
+ continue;
- if (inode_type == IA_INVAL) {
- inode_type = replies[i].poststat.ia_type;
- type_idx = i;
- continue;
- }
-
- if (sources[i] || source == -1) {
- if ((sources[type_idx] || source == -1) &&
- (inode_type != replies[i].poststat.ia_type)) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- AFR_MSG_SPLIT_BRAIN,
- "Type mismatch for <gfid:%s>/%s: "
- "%d on %s and %d on %s",
- uuid_utoa(pargfid), bname,
- replies[i].poststat.ia_type,
- priv->children[i]->name,
- replies[type_idx].poststat.ia_type,
- priv->children[type_idx]->name);
- return -EIO;
- }
- inode_type = replies[i].poststat.ia_type;
- type_idx = i;
- }
+ if (inode_type == IA_INVAL) {
+ inode_type = replies[i].poststat.ia_type;
+ type_idx = i;
+ continue;
}
- return 0;
+ inode_type1 = replies[i].poststat.ia_type;
+ if (sources[i] || source == -1) {
+ if ((sources[type_idx] || source == -1) &&
+ (inode_type != inode_type1)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN,
+ "Type mismatch for <gfid:%s>/%s: "
+ "%s on %s and %s on %s",
+ uuid_utoa(pargfid), bname,
+ gf_inode_type_to_str(inode_type1),
+ priv->children[i]->name,
+ gf_inode_type_to_str(inode_type),
+ priv->children[type_idx]->name);
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
+ "subvol=%s;type=file;"
+ "file=<gfid:%s>/%s;count=2;"
+ "child-%d=%s;type-%d=%s;child-%d=%s;"
+ "type-%d=%s",
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(pargfid), bname, i, priv->children[i]->name,
+ i, gf_inode_type_to_str(inode_type1), type_idx,
+ priv->children[type_idx]->name, type_idx,
+ gf_inode_type_to_str(inode_type));
+ return -EIO;
+ }
+ inode_type = replies[i].poststat.ia_type;
+ type_idx = i;
+ }
+ }
+ return 0;
}
static int
-afr_selfheal_name_gfid_mismatch_check (xlator_t *this, struct afr_reply *replies,
- int source, unsigned char *sources,
- int *gfid_idx, uuid_t pargfid,
- const char *bname)
+afr_selfheal_name_gfid_mismatch_check(xlator_t *this, struct afr_reply *replies,
+ int source, unsigned char *sources,
+ int *gfid_idx, uuid_t pargfid,
+ const char *bname, inode_t *inode,
+ unsigned char *locked_on, dict_t *xdata)
{
- int i = 0;
- int gfid_idx_iter = -1;
- void *gfid = NULL;
- afr_private_t *priv = NULL;
- char g1[64], g2[64];
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!replies[i].valid)
- continue;
-
- if (gf_uuid_is_null (replies[i].poststat.ia_gfid))
- continue;
-
- if (!gfid) {
- gfid = &replies[i].poststat.ia_gfid;
- gfid_idx_iter = i;
- continue;
- }
-
- if (sources[i] || source == -1) {
- if ((sources[gfid_idx_iter] || source == -1) &&
- gf_uuid_compare (gfid, replies[i].poststat.ia_gfid)) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- AFR_MSG_SPLIT_BRAIN,
- "GFID mismatch for <gfid:%s>/%s "
- "%s on %s and %s on %s",
- uuid_utoa (pargfid), bname,
- uuid_utoa_r (replies[i].poststat.ia_gfid, g1),
- priv->children[i]->name,
- uuid_utoa_r (replies[gfid_idx_iter].poststat.ia_gfid, g2),
- priv->children[gfid_idx_iter]->name);
- return -EIO;
- }
-
- gfid = &replies[i].poststat.ia_gfid;
- gfid_idx_iter = i;
- }
- }
-
- *gfid_idx = gfid_idx_iter;
- return 0;
+ int i = 0;
+ int gfid_idx_iter = -1;
+ int ret = -1;
+ void *gfid = NULL;
+ void *gfid1 = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
+
+ if (gf_uuid_is_null(replies[i].poststat.ia_gfid))
+ continue;
+
+ if (!gfid) {
+ gfid = &replies[i].poststat.ia_gfid;
+ gfid_idx_iter = i;
+ continue;
+ }
+
+ gfid1 = &replies[i].poststat.ia_gfid;
+ if (sources[i] || source == -1) {
+ if ((sources[gfid_idx_iter] || source == -1) &&
+ gf_uuid_compare(gfid, gfid1)) {
+ ret = afr_gfid_split_brain_source(this, replies, inode, pargfid,
+ bname, gfid_idx_iter, i,
+ locked_on, gfid_idx, xdata);
+ if (!ret && *gfid_idx >= 0) {
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ "GFID split-brain resolved");
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_DICT_SET_FAILED,
+ "Error setting gfid-"
+ "heal-msg dict");
+ }
+ return ret;
+ }
+ gfid = &replies[i].poststat.ia_gfid;
+ gfid_idx_iter = i;
+ }
+ }
+
+ *gfid_idx = gfid_idx_iter;
+ return 0;
}
static gf_boolean_t
-afr_selfheal_name_source_empty_check (xlator_t *this, struct afr_reply *replies,
- unsigned char *sources, int source)
+afr_selfheal_name_source_empty_check(xlator_t *this, struct afr_reply *replies,
+ unsigned char *sources, int source)
{
- int i = 0;
- afr_private_t *priv = NULL;
- gf_boolean_t source_is_empty = _gf_true;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ gf_boolean_t source_is_empty = _gf_true;
- priv = this->private;
+ priv = this->private;
- if (source == -1) {
- source_is_empty = _gf_false;
- goto out;
- }
+ if (source == -1) {
+ source_is_empty = _gf_false;
+ goto out;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (!sources[i])
- continue;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
- if (replies[i].op_ret == -1 && replies[i].op_errno == ENOENT)
- continue;
+ if (replies[i].op_ret == -1 && replies[i].op_errno == ENOENT)
+ continue;
- source_is_empty = _gf_false;
- break;
- }
+ source_is_empty = _gf_false;
+ break;
+ }
out:
- return source_is_empty;
+ return source_is_empty;
}
int
-__afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent,
- uuid_t pargfid, const char *bname, inode_t *inode,
- unsigned char *sources, unsigned char *sinks,
- unsigned char *healed_sinks, int source,
- unsigned char *locked_on, struct afr_reply *replies,
- void *gfid_req)
+__afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, const char *bname, inode_t *inode,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks, int source,
+ unsigned char *locked_on, struct afr_reply *replies,
+ void *gfid_req, dict_t *xdata)
{
- int gfid_idx = -1;
- int ret = -1;
- void *gfid = NULL;
- gf_boolean_t source_is_empty = _gf_true;
- gf_boolean_t need_heal = _gf_false;
- gf_boolean_t is_gfid_absent = _gf_false;
-
- need_heal = afr_selfheal_name_need_heal_check (this, replies);
- if (!need_heal)
- return 0;
-
- source_is_empty = afr_selfheal_name_source_empty_check (this, replies,
- sources,
- source);
- if (source_is_empty)
- return __afr_selfheal_name_expunge (this, parent, pargfid,
- bname, inode, replies);
-
- ret = afr_selfheal_name_type_mismatch_check (this, replies, source,
- sources, pargfid, bname);
- if (ret)
- return ret;
-
- ret = afr_selfheal_name_gfid_mismatch_check (this, replies, source,
- sources, &gfid_idx,
- pargfid, bname);
- if (ret)
- return ret;
-
- if (gfid_idx == -1) {
- if (!gfid_req || gf_uuid_is_null (gfid_req))
- return -1;
- gfid = gfid_req;
- } else {
- gfid = &replies[gfid_idx].poststat.ia_gfid;
- }
-
- is_gfid_absent = (gfid_idx == -1) ? _gf_true : _gf_false;
- ret = __afr_selfheal_assign_gfid (this, parent, pargfid, bname, inode,
- replies, gfid, locked_on,
- is_gfid_absent);
- if (ret)
- return ret;
-
- if (gfid_idx == -1) {
- gfid_idx = afr_selfheal_gfid_idx_get (this, replies, sources);
- if (gfid_idx == -1)
- return -1;
- }
+ int gfid_idx = -1;
+ int ret = -1;
+ void *gfid = NULL;
+ gf_boolean_t source_is_empty = _gf_true;
+ gf_boolean_t need_heal = _gf_false;
+ gf_boolean_t is_gfid_absent = _gf_false;
+
+ need_heal = afr_selfheal_name_need_heal_check(this, replies);
+ if (!need_heal)
+ return 0;
- return __afr_selfheal_name_impunge (frame, this, parent, pargfid,
- bname, inode,
- replies, gfid_idx);
+ source_is_empty = afr_selfheal_name_source_empty_check(this, replies,
+ sources, source);
+ if (source_is_empty) {
+ ret = __afr_selfheal_name_expunge(this, parent, pargfid, bname, inode,
+ replies);
+ if (ret == -EIO)
+ ret = -1;
+ return ret;
+ }
+
+ ret = afr_selfheal_name_type_mismatch_check(this, replies, source, sources,
+ pargfid, bname);
+ if (ret)
+ return ret;
+
+ ret = afr_selfheal_name_gfid_mismatch_check(this, replies, source, sources,
+ &gfid_idx, pargfid, bname,
+ inode, locked_on, xdata);
+ if (ret)
+ return ret;
+
+ if (gfid_idx == -1) {
+ if (!gfid_req || gf_uuid_is_null(gfid_req))
+ return -1;
+ gfid = gfid_req;
+ } else {
+ gfid = &replies[gfid_idx].poststat.ia_gfid;
+ if (source == -1)
+ /* Either entry split-brain or dirty xattrs are present on parent.*/
+ source = gfid_idx;
+ }
+
+ is_gfid_absent = (gfid_idx == -1) ? _gf_true : _gf_false;
+ ret = __afr_selfheal_assign_gfid(this, parent, pargfid, bname, inode,
+ replies, gfid, locked_on, source, sources,
+ is_gfid_absent, &gfid_idx);
+ if (ret || (gfid_idx < 0))
+ return ret;
+
+ ret = __afr_selfheal_name_impunge(frame, this, parent, pargfid, bname,
+ inode, replies, gfid_idx);
+ if (ret == -EIO)
+ ret = -1;
+
+ return ret;
}
-
int
-__afr_selfheal_name_finalize_source (xlator_t *this, unsigned char *sources,
- unsigned char *healed_sinks,
- unsigned char *locked_on,
- struct afr_reply *replies,
- uint64_t *witness)
+__afr_selfheal_name_finalize_source(xlator_t *this, unsigned char *sources,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on, uint64_t *witness)
{
- int i = 0;
- afr_private_t *priv = NULL;
- int source = -1;
- int sources_count = 0;
-
- priv = this->private;
-
- sources_count = AFR_COUNT (sources, priv->child_count);
-
- if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0)
- || !sources_count || afr_does_witness_exist (this, witness)) {
- memset (sources, 0, sizeof (*sources) * priv->child_count);
- afr_mark_active_sinks (this, sources, locked_on, healed_sinks);
- return -1;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i]) {
- source = i;
- break;
- }
- }
-
- return source;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ int sources_count = 0;
+
+ priv = this->private;
+
+ sources_count = AFR_COUNT(sources, priv->child_count);
+
+ if ((AFR_CMP(locked_on, healed_sinks, priv->child_count) == 0) ||
+ !sources_count || afr_does_witness_exist(this, witness)) {
+ memset(sources, 0, sizeof(*sources) * priv->child_count);
+ afr_mark_active_sinks(this, sources, locked_on, healed_sinks);
+ return -1;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source = i;
+ break;
+ }
+ }
+
+ return source;
}
int
-__afr_selfheal_name_prepare (call_frame_t *frame, xlator_t *this, inode_t *parent,
- uuid_t pargfid, unsigned char *locked_on,
- unsigned char *sources, unsigned char *sinks,
- unsigned char *healed_sinks, int *source_p)
+__afr_selfheal_name_prepare(call_frame_t *frame, xlator_t *this,
+ inode_t *parent, uuid_t pargfid,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks,
+ int *source_p)
{
- int ret = -1;
- int source = -1;
- afr_private_t *priv = NULL;
- struct afr_reply *replies = NULL;
- uint64_t *witness = NULL;
-
- priv = this->private;
-
- replies = alloca0 (priv->child_count * sizeof(*replies));
-
- ret = afr_selfheal_unlocked_discover (frame, parent, pargfid, replies);
- if (ret)
- goto out;
-
- witness = alloca0 (sizeof (*witness) * priv->child_count);
- ret = afr_selfheal_find_direction (frame, this, replies,
- AFR_ENTRY_TRANSACTION,
- locked_on, sources, sinks, witness);
- if (ret)
- goto out;
-
- /* Initialize the healed_sinks[] array optimistically to
- the intersection of to-be-healed (i.e sinks[]) and
- the list of servers which are up (i.e locked_on[]).
-
- As we encounter failures in the healing process, we
- will unmark the respective servers in the healed_sinks[]
- array.
- */
- AFR_INTERSECT (healed_sinks, sinks, locked_on, priv->child_count);
-
- source = __afr_selfheal_name_finalize_source (this, sources,
- healed_sinks,
- locked_on, replies,
- witness);
- if (source < 0) {
- /* If source is < 0 (typically split-brain), we perform a
- conservative merge of entries rather than erroring out */
- }
- *source_p = source;
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ uint64_t *witness = NULL;
+
+ priv = this->private;
+
+ replies = alloca0(priv->child_count * sizeof(*replies));
+
+ ret = afr_selfheal_unlocked_discover(frame, parent, pargfid, replies);
+ if (ret)
+ goto out;
+
+ witness = alloca0(sizeof(*witness) * priv->child_count);
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_ENTRY_TRANSACTION, locked_on, sources,
+ sinks, witness, NULL);
+ if (ret)
+ goto out;
+
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
+
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ AFR_INTERSECT(healed_sinks, sinks, locked_on, priv->child_count);
+
+ source = __afr_selfheal_name_finalize_source(this, sources, healed_sinks,
+ locked_on, witness);
+ if (source < 0) {
+ /* If source is < 0 (typically split-brain), we perform a
+ conservative merge of entries rather than erroring out */
+ }
+ *source_p = source;
out:
- if (replies)
- afr_replies_wipe (replies, priv->child_count);
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
- return ret;
+ return ret;
}
-
int
-afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent,
- uuid_t pargfid, const char *bname, void *gfid_req)
+afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, const char *bname, void *gfid_req,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
- unsigned char *sources = NULL;
- unsigned char *sinks = NULL;
- unsigned char *healed_sinks = NULL;
- unsigned char *locked_on = NULL;
- int source = -1;
- struct afr_reply *replies = NULL;
- int ret = -1;
- inode_t *inode = NULL;
- dict_t *xattr = NULL;
-
- xattr = dict_new ();
- if (!xattr)
- return -ENOMEM;
-
- ret = dict_set_int32 (xattr, GF_GFIDLESS_LOOKUP, 1);
- if (ret) {
- dict_destroy (xattr);
- return -1;
+ afr_private_t *priv = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *locked_on = NULL;
+ int source = -1;
+ struct afr_reply *replies = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+ dict_t *xattr = NULL;
+
+ xattr = dict_new();
+ if (!xattr)
+ return -ENOMEM;
+
+ ret = dict_set_int32_sizen(xattr, GF_GFIDLESS_LOOKUP, 1);
+ if (ret) {
+ dict_unref(xattr);
+ return -1;
+ }
+
+ priv = this->private;
+
+ locked_on = alloca0(priv->child_count);
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ healed_sinks = alloca0(priv->child_count);
+
+ replies = alloca0(priv->child_count * sizeof(*replies));
+
+ ret = afr_selfheal_entrylk(frame, this, parent, this->name, bname,
+ locked_on);
+ {
+ if (ret < priv->child_count) {
+ ret = -ENOTCONN;
+ goto unlock;
}
- priv = this->private;
-
- locked_on = alloca0 (priv->child_count);
- sources = alloca0 (priv->child_count);
- sinks = alloca0 (priv->child_count);
- healed_sinks = alloca0 (priv->child_count);
-
- replies = alloca0 (priv->child_count * sizeof(*replies));
-
- ret = afr_selfheal_entrylk (frame, this, parent, this->name, bname,
- locked_on);
- {
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
- ret = -ENOTCONN;
- goto unlock;
- }
-
- ret = __afr_selfheal_name_prepare (frame, this, parent, pargfid,
- locked_on, sources, sinks,
- healed_sinks, &source);
- if (ret)
- goto unlock;
-
- inode = afr_selfheal_unlocked_lookup_on (frame, parent, bname,
- replies, locked_on,
- xattr);
- if (!inode) {
- ret = -ENOMEM;
- goto unlock;
- }
-
- ret = __afr_selfheal_name_do (frame, this, parent, pargfid,
- bname, inode, sources, sinks,
- healed_sinks, source, locked_on,
- replies, gfid_req);
- }
+ ret = __afr_selfheal_name_prepare(frame, this, parent, pargfid,
+ locked_on, sources, sinks,
+ healed_sinks, &source);
+ if (ret)
+ goto unlock;
+
+ inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies,
+ locked_on, xattr);
+ if (!inode) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_name_do(frame, this, parent, pargfid, bname, inode,
+ sources, sinks, healed_sinks, source,
+ locked_on, replies, gfid_req, xdata);
+ }
unlock:
- afr_selfheal_unentrylk (frame, this, parent, this->name, bname,
- locked_on);
- if (inode)
- inode_unref (inode);
+ afr_selfheal_unentrylk(frame, this, parent, this->name, bname, locked_on,
+ NULL);
+ if (inode)
+ inode_unref(inode);
- if (replies)
- afr_replies_wipe (replies, priv->child_count);
- if (xattr)
- dict_unref (xattr);
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
+ if (xattr)
+ dict_unref(xattr);
- return ret;
+ return ret;
}
-
int
-afr_selfheal_name_unlocked_inspect (call_frame_t *frame, xlator_t *this,
- inode_t *parent, uuid_t pargfid,
- const char *bname, gf_boolean_t *need_heal)
+afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this,
+ inode_t *parent, uuid_t pargfid,
+ const char *bname, gf_boolean_t *need_heal)
{
- afr_private_t *priv = NULL;
- int i = 0;
- struct afr_reply *replies = NULL;
- inode_t *inode = NULL;
- int first_idx = -1;
-
- priv = this->private;
-
- replies = alloca0 (sizeof (*replies) * priv->child_count);
-
- inode = afr_selfheal_unlocked_lookup_on (frame, parent, bname,
- replies, priv->child_up, NULL);
- if (!inode)
- return -ENOMEM;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!replies[i].valid)
- continue;
-
- if ((replies[i].op_ret == -1) &&
- (replies[i].op_errno == ENODATA))
- *need_heal = _gf_true;
-
- if (first_idx == -1) {
- first_idx = i;
- continue;
- }
-
- if (replies[i].op_ret != replies[first_idx].op_ret)
- *need_heal = _gf_true;
-
- if (gf_uuid_compare (replies[i].poststat.ia_gfid,
- replies[first_idx].poststat.ia_gfid))
- *need_heal = _gf_true;
- }
-
- if (inode)
- inode_unref (inode);
- if (replies)
- afr_replies_wipe (replies, priv->child_count);
- return 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ struct afr_reply *replies = NULL;
+ inode_t *inode = NULL;
+ int first_idx = -1;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ replies = alloca0(sizeof(*replies) * priv->child_count);
+
+ inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies,
+ local->child_up, NULL);
+ if (!inode)
+ return -ENOMEM;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if ((replies[i].op_ret == -1) && (replies[i].op_errno == ENODATA)) {
+ *need_heal = _gf_true;
+ break;
+ }
+
+ if (first_idx == -1) {
+ first_idx = i;
+ continue;
+ }
+
+ if (replies[i].op_ret != replies[first_idx].op_ret) {
+ *need_heal = _gf_true;
+ break;
+ }
+
+ if (gf_uuid_compare(replies[i].poststat.ia_gfid,
+ replies[first_idx].poststat.ia_gfid)) {
+ *need_heal = _gf_true;
+ break;
+ }
+ }
+
+ if (inode)
+ inode_unref(inode);
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
+ return 0;
}
int
-afr_selfheal_name (xlator_t *this, uuid_t pargfid, const char *bname,
- void *gfid_req)
+afr_selfheal_name(xlator_t *this, uuid_t pargfid, const char *bname,
+ void *gfid_req, dict_t *xdata)
{
- inode_t *parent = NULL;
- call_frame_t *frame = NULL;
- int ret = -1;
- gf_boolean_t need_heal = _gf_false;
-
- parent = afr_inode_find (this, pargfid);
- if (!parent)
- goto out;
-
- frame = afr_frame_create (this);
- if (!frame)
- goto out;
-
- ret = afr_selfheal_name_unlocked_inspect (frame, this, parent, pargfid,
- bname, &need_heal);
- if (ret)
- goto out;
-
- if (need_heal) {
- ret = afr_selfheal_name_do (frame, this, parent, pargfid, bname,
- gfid_req);
- if (ret)
- goto out;
- }
+ inode_t *parent = NULL;
+ call_frame_t *frame = NULL;
+ int ret = -1;
+ gf_boolean_t need_heal = _gf_false;
+
+ parent = afr_inode_find(this, pargfid);
+ if (!parent)
+ goto out;
+
+ frame = afr_frame_create(this, NULL);
+ if (!frame)
+ goto out;
+
+ ret = afr_selfheal_name_unlocked_inspect(frame, this, parent, pargfid,
+ bname, &need_heal);
+ if (ret)
+ goto out;
+
+ if (need_heal) {
+ ret = afr_selfheal_name_do(frame, this, parent, pargfid, bname,
+ gfid_req, xdata);
+ if (ret)
+ goto out;
+ }
- ret = 0;
+ ret = 0;
out:
- if (parent)
- inode_unref (parent);
- if (frame)
- AFR_STACK_DESTROY (frame);
+ if (parent)
+ inode_unref(parent);
+ if (frame)
+ AFR_STACK_DESTROY(frame);
- return ret;
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 32be2480234..48e6dbcfb18 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -8,244 +8,370 @@
cases as published by the Free Software Foundation.
*/
-
#ifndef _AFR_SELFHEAL_H
#define _AFR_SELFHEAL_H
-#define AFR_SH_MIN_PARTICIPANTS 2
-
/* Perform fop on all UP subvolumes and wait for all callbacks to return */
-#define AFR_ONALL(frame, rfn, fop, args ...) do { \
- afr_local_t *__local = frame->local; \
- afr_private_t *__priv = frame->this->private; \
- int __i = 0, __count = 0; \
- \
- afr_local_replies_wipe (__local, __priv); \
- \
- for (__i = 0; __i < __priv->child_count; __i++) { \
- if (!__priv->child_up[__i]) continue; \
- STACK_WIND_COOKIE (frame, rfn, (void *)(long) __i, \
- __priv->children[__i], \
- __priv->children[__i]->fops->fop, args); \
- __count++; \
- } \
- syncbarrier_wait (&__local->barrier, __count); \
- } while (0)
-
+#define AFR_ONALL(frame, rfn, fop, args...) \
+ do { \
+ afr_local_t *__local = frame->local; \
+ afr_private_t *__priv = frame->this->private; \
+ int __i = 0, __count = 0; \
+ unsigned char *__child_up = alloca(__priv->child_count); \
+ \
+ memcpy(__child_up, __priv->child_up, \
+ sizeof(*__child_up) * __priv->child_count); \
+ __count = AFR_COUNT(__child_up, __priv->child_count); \
+ \
+ __local->barrier.waitfor = __count; \
+ afr_local_replies_wipe(__local, __priv); \
+ \
+ for (__i = 0; __i < __priv->child_count; __i++) { \
+ if (!__child_up[__i]) \
+ continue; \
+ STACK_WIND_COOKIE(frame, rfn, (void *)(long)__i, \
+ __priv->children[__i], \
+ __priv->children[__i]->fops->fop, args); \
+ } \
+ syncbarrier_wait(&__local->barrier, __count); \
+ } while (0)
/* Perform fop on all subvolumes represented by list[] array and wait
for all callbacks to return */
-#define AFR_ONLIST(list, frame, rfn, fop, args ...) do { \
- afr_local_t *__local = frame->local; \
- afr_private_t *__priv = frame->this->private; \
- int __i = 0, __count = 0; \
- \
- afr_local_replies_wipe (__local, __priv); \
- \
- for (__i = 0; __i < __priv->child_count; __i++) { \
- if (!list[__i]) continue; \
- STACK_WIND_COOKIE (frame, rfn, (void *)(long) __i, \
- __priv->children[__i], \
- __priv->children[__i]->fops->fop, args); \
- __count++; \
- } \
- syncbarrier_wait (&__local->barrier, __count); \
- } while (0)
-
-
-#define AFR_SEQ(frame, rfn, fop, args ...) do { \
- afr_local_t *__local = frame->local; \
- afr_private_t *__priv = frame->this->private; \
- int __i = 0; \
- \
- afr_local_replies_wipe (__local, __priv); \
- \
- for (__i = 0; __i < __priv->child_count; __i++) { \
- if (!__priv->child_up[__i]) continue; \
- STACK_WIND_COOKIE (frame, rfn, (void *)(long) __i, \
- __priv->children[__i], \
- __priv->children[__i]->fops->fop, args); \
- syncbarrier_wait (&__local->barrier, 1); \
- } \
- } while (0)
-
-
-#define ALLOC_MATRIX(n, type) ({type **__ptr = NULL; \
- int __i; \
- __ptr = alloca0 (n * sizeof(type *)); \
- for (__i = 0; __i < n; __i++) __ptr[__i] = alloca0 (n * sizeof(type)); \
- __ptr;})
-
+#define AFR_ONLIST(list, frame, rfn, fop, args...) \
+ do { \
+ afr_local_t *__local = frame->local; \
+ afr_private_t *__priv = frame->this->private; \
+ int __i = 0; \
+ int __count = 0; \
+ unsigned char *__list = alloca(__priv->child_count); \
+ \
+ memcpy(__list, list, sizeof(*__list) * __priv->child_count); \
+ __count = AFR_COUNT(__list, __priv->child_count); \
+ __local->barrier.waitfor = __count; \
+ afr_local_replies_wipe(__local, __priv); \
+ \
+ for (__i = 0; __i < __priv->child_count; __i++) { \
+ if (!__list[__i]) \
+ continue; \
+ STACK_WIND_COOKIE(frame, rfn, (void *)(long)__i, \
+ __priv->children[__i], \
+ __priv->children[__i]->fops->fop, args); \
+ } \
+ syncbarrier_wait(&__local->barrier, __count); \
+ } while (0)
+
+#define AFR_SEQ(frame, rfn, fop, args...) \
+ do { \
+ afr_local_t *__local = frame->local; \
+ afr_private_t *__priv = frame->this->private; \
+ int __i = 0; \
+ \
+ afr_local_replies_wipe(__local, __priv); \
+ \
+ for (__i = 0; __i < __priv->child_count; __i++) { \
+ if (!__priv->child_up[__i]) \
+ continue; \
+ STACK_WIND_COOKIE(frame, rfn, (void *)(long)__i, \
+ __priv->children[__i], \
+ __priv->children[__i]->fops->fop, args); \
+ syncbarrier_wait(&__local->barrier, 1); \
+ } \
+ } while (0)
+
+#define ALLOC_MATRIX(n, type) \
+ ({ \
+ int __i; \
+ type **__ptr = alloca(n * sizeof(type *)); \
+ \
+ for (__i = 0; __i < n; __i++) \
+ __ptr[__i] = alloca0(n * sizeof(type)); \
+ __ptr; \
+ })
+
+#define IA_EQUAL(f, s, field) \
+ (memcmp(&(f.ia_##field), &(s.ia_##field), sizeof(s.ia_##field)) == 0)
+
+#define SBRAIN_HEAL_NO_GO_MSG \
+ "Failed to obtain replies from all bricks of " \
+ "the replica (are they up?). Cannot resolve split-brain."
+#define SFILE_NOT_IN_SPLIT_BRAIN "File not in split-brain"
+#define SNO_BIGGER_FILE "No bigger file"
+#define SNO_DIFF_IN_MTIME "No difference in mtime"
+#define SUSE_SOURCE_BRICK_TO_HEAL \
+ "Use source-brick option to heal metadata" \
+ " split-brain"
+#define SINVALID_BRICK_NAME "Invalid brick name"
+#define SBRICK_IS_NOT_UP "Brick is not up"
+#define SBRICK_NOT_CONNECTED "Brick is not connected"
+#define SLESS_THAN2_BRICKS_in_REP "< 2 bricks in replica are up"
+#define SBRICK_IS_REMOTE "Brick is remote"
+#define SSTARTED_SELF_HEAL "Started self-heal"
+#define SOP_NOT_SUPPORTED "Operation Not Supported"
+#define SFILE_NOT_UNDER_DATA \
+ "The file is not under data or metadata " \
+ "split-brain"
+#define SFILE_NOT_IN_SPLIT_BRAIN "File not in split-brain"
+#define SALL_BRICKS_UP_TO_RESOLVE \
+ "All the bricks should be up to resolve the" \
+ " gfid split brain"
+#define SERROR_GETTING_SRC_BRICK "Error getting the source brick"
+int
+afr_selfheal(xlator_t *this, uuid_t gfid);
-#define IA_EQUAL(f,s,field) (memcmp (&(f.ia_##field), &(s.ia_##field), sizeof (s.ia_##field)) == 0)
+gf_boolean_t
+afr_throttled_selfheal(call_frame_t *frame, xlator_t *this);
+int
+afr_selfheal_name(xlator_t *this, uuid_t gfid, const char *name, void *gfid_req,
+ dict_t *xdata);
int
-afr_selfheal (xlator_t *this, uuid_t gfid);
+afr_selfheal_data(call_frame_t *frame, xlator_t *this, fd_t *fd);
int
-afr_selfheal_name (xlator_t *this, uuid_t gfid, const char *name,
- void *gfid_req);
+afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode);
int
-afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode);
+afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode);
int
-afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode);
+afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
+ inode_t *inode, struct afr_reply *replies, int source,
+ unsigned char *sources, void *gfid, int *gfid_idx);
int
-afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode);
+afr_selfheal_inodelk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on);
+int
+afr_selfheal_tryinodelk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on);
int
-afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
- char *dom, off_t off, size_t size,
- unsigned char *locked_on);
+afr_selfheal_tie_breaker_inodelk(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, char *dom, off_t off,
+ size_t size, unsigned char *locked_on);
int
-afr_selfheal_tryinodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
- char *dom, off_t off, size_t size,
- unsigned char *locked_on);
+afr_selfheal_uninodelk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ const unsigned char *locked_on);
int
-afr_selfheal_uninodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
- char *dom, off_t off, size_t size,
- const unsigned char *locked_on);
+afr_selfheal_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on);
int
-afr_selfheal_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
- char *dom, const char *name, unsigned char *locked_on);
+afr_selfheal_tryentrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on);
int
-afr_selfheal_tryentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
- char *dom, const char *name, unsigned char *locked_on);
+afr_selfheal_tie_breaker_entrylk(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, char *dom, const char *name,
+ unsigned char *locked_on);
int
-afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
- char *dom, const char *name, unsigned char *locked_on);
+afr_selfheal_unentrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on,
+ dict_t *xdata);
int
-afr_selfheal_unlocked_discover (call_frame_t *frame, inode_t *inode,
- uuid_t gfid, struct afr_reply *replies);
+afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
+ struct afr_reply *replies);
+int
+afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
+ uuid_t gfid, struct afr_reply *replies,
+ unsigned char *discover_on, dict_t *dict);
inode_t *
-afr_selfheal_unlocked_lookup_on (call_frame_t *frame, inode_t *parent,
- const char *name, struct afr_reply *replies,
- unsigned char *lookup_on, dict_t *xattr);
+afr_selfheal_unlocked_lookup_on(call_frame_t *frame, inode_t *parent,
+ const char *name, struct afr_reply *replies,
+ unsigned char *lookup_on, dict_t *xattr);
+
+int
+afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies,
+ afr_transaction_type type, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ uint64_t *witness, unsigned char *flag);
+int
+afr_selfheal_fill_matrix(xlator_t *this, int **matrix, int subvol, int idx,
+ dict_t *xdata);
int
-afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this,
- struct afr_reply *replies,
- afr_transaction_type type,
- unsigned char *locked_on, unsigned char *sources,
- unsigned char *sinks, uint64_t *witness);
+afr_selfheal_extract_xattr(xlator_t *this, struct afr_reply *replies,
+ afr_transaction_type type, int *dirty, int **matrix);
int
-afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies,
- afr_transaction_type type, int *dirty, int **matrix);
+afr_sh_generic_fop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata);
int
-afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,
- unsigned char *sources, unsigned char *sinks,
- unsigned char *healed_sinks, afr_transaction_type type,
- struct afr_reply *replies, unsigned char *locked_on);
+afr_selfheal_restore_time(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, unsigned char *healed_sinks,
+ struct afr_reply *replies);
+int
+afr_selfheal_undo_pending(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ afr_transaction_type type, struct afr_reply *replies,
+ unsigned char *locked_on);
int
-afr_selfheal_recreate_entry (xlator_t *this, int dst, int source, inode_t *dir,
- const char *name, inode_t *inode,
- struct afr_reply *replies,
- unsigned char *newentry);
+afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
+ unsigned char *sources, inode_t *dir,
+ const char *name, inode_t *inode,
+ struct afr_reply *replies);
int
-afr_selfheal_post_op (call_frame_t *frame, xlator_t *this, inode_t *inode,
- int subvol, dict_t *xattr);
+afr_selfheal_post_op(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int subvol, dict_t *xattr, dict_t *xdata);
call_frame_t *
-afr_frame_create (xlator_t *this);
+afr_frame_create(xlator_t *this, int32_t *op_errno);
inode_t *
-afr_inode_find (xlator_t *this, uuid_t gfid);
+afr_inode_find(xlator_t *this, uuid_t gfid);
int
-afr_selfheal_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata,
- struct iatt *parbuf);
+afr_selfheal_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *parbuf);
+void
+afr_reply_copy(struct afr_reply *dst, struct afr_reply *src);
void
-afr_replies_copy (struct afr_reply *dst, struct afr_reply *src, int count);
+afr_replies_copy(struct afr_reply *dst, struct afr_reply *src, int count);
int
-afr_selfheal_newentry_mark (call_frame_t *frame, xlator_t *this, inode_t *inode,
- int source, struct afr_reply *replies,
- unsigned char *sources, unsigned char *newentry);
-
-inode_t*
-afr_inode_link (inode_t *inode, struct iatt *iatt);
+afr_selfheal_newentry_mark(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, struct afr_reply *replies,
+ unsigned char *sources, unsigned char *newentry);
unsigned int
-afr_success_count (struct afr_reply *replies, unsigned int count);
+afr_success_count(struct afr_reply *replies, unsigned int count);
void
-afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type,
- int source, unsigned char *healed_sinks);
+afr_log_selfheal(uuid_t gfid, xlator_t *this, int ret, char *type, int source,
+ unsigned char *sources, unsigned char *healed_sinks);
void
-afr_mark_largest_file_as_source (xlator_t *this, unsigned char *sources,
- struct afr_reply *replies);
+afr_mark_largest_file_as_source(xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies);
void
-afr_mark_active_sinks (xlator_t *this, unsigned char *sources,
- unsigned char *locked_on, unsigned char *sinks);
+afr_mark_active_sinks(xlator_t *this, unsigned char *sources,
+ unsigned char *locked_on, unsigned char *sinks);
+
+gf_boolean_t
+afr_dict_contains_heal_op(call_frame_t *frame);
gf_boolean_t
-afr_dict_contains_heal_op (call_frame_t *frame);
+afr_can_decide_split_brain_source_sinks(struct afr_reply *replies,
+ int child_count);
+int
+afr_mark_split_brain_source_sinks(
+ call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on,
+ struct afr_reply *replies, afr_transaction_type type);
+
+int
+afr_sh_get_fav_by_policy(xlator_t *this, struct afr_reply *replies,
+ inode_t *inode, char **policy_str);
int
-afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this,
- unsigned char *sources,
- unsigned char *sinks,
- unsigned char *healed_sinks,
- unsigned char *locked_on,
- struct afr_reply *replies,
- afr_transaction_type type);
+_afr_fav_child_reset_sink_xattrs(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, int source,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ afr_transaction_type type,
+ unsigned char *locked_on,
+ struct afr_reply *replies);
int
-afr_get_child_index_from_name (xlator_t *this, char *name);
+afr_get_child_index_from_name(xlator_t *this, char *name);
gf_boolean_t
-afr_does_witness_exist (xlator_t *this, uint64_t *witness);
+afr_does_witness_exist(xlator_t *this, uint64_t *witness);
int
-__afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this,
+__afr_selfheal_data_prepare(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ struct afr_reply *replies, unsigned char *flag);
+
+int
+__afr_selfheal_metadata_prepare(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ struct afr_reply *replies, unsigned char *flag);
+int
+__afr_selfheal_entry_prepare(call_frame_t *frame, xlator_t *this,
inode_t *inode, unsigned char *locked_on,
- unsigned char *sources,
- unsigned char *sinks, unsigned char *healed_sinks,
- struct afr_reply *replies);
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
+ struct afr_reply *replies, int *source_p,
+ unsigned char *flag);
int
-__afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this,
- inode_t *inode, unsigned char *locked_on,
- unsigned char *sources,
- unsigned char *sinks,
- unsigned char *healed_sinks,
- struct afr_reply *replies);
+afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
+ inode_t **link_inode, gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal,
+ gf_boolean_t *entry_selfheal,
+ struct afr_reply *replies);
+
+int
+afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid);
+
+int
+afr_selfheal_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata);
+
+int
+afr_locked_fill(call_frame_t *frame, xlator_t *this, unsigned char *locked_on);
+int
+afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources,
+ afr_transaction_type type);
+
+int
+afr_selfheal_metadata_by_stbuf(xlator_t *this, struct iatt *stbuf);
+
int
-__afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this,
- inode_t *inode, unsigned char *locked_on,
- unsigned char *sources,
- unsigned char *sinks,
- unsigned char *healed_sinks,
- struct afr_reply *replies, int *source_p);
+afr_sh_fav_by_size(xlator_t *this, struct afr_reply *replies, inode_t *inode);
+int
+afr_sh_fav_by_mtime(xlator_t *this, struct afr_reply *replies, inode_t *inode);
+int
+afr_sh_fav_by_ctime(xlator_t *this, struct afr_reply *replies, inode_t *inode);
int
-afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this,
- uuid_t gfid, inode_t **link_inode,
- gf_boolean_t *data_selfheal,
- gf_boolean_t *metadata_selfheal,
- gf_boolean_t *entry_selfheal);
+afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+ inode_t *inode, uuid_t pargfid, const char *bname,
+ int src_idx, int child_idx,
+ unsigned char *locked_on, int *src, dict_t *xdata);
+int
+afr_mark_source_sinks_if_file_empty(xlator_t *this, unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ afr_transaction_type type);
+gf_boolean_t
+afr_is_file_empty_on_all_children(afr_private_t *priv,
+ struct afr_reply *replies);
+
+int
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child, struct afr_reply *replies);
int
-afr_selfheal_do (call_frame_t *frame, xlator_t *this, uuid_t gfid);
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode);
#endif /* !_AFR_SELFHEAL_H */
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index fe9367597a2..109fd4b7421 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -8,1189 +8,1709 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "afr.h"
#include "afr-self-heal.h"
#include "afr-self-heald.h"
#include "protocol-common.h"
-#include "syncop-utils.h"
-
-#define SHD_INODE_LRU_LIMIT 2048
-#define AFR_EH_SPLIT_BRAIN_LIMIT 1024
-#define AFR_STATISTICS_HISTORY_SIZE 50
-
-
-#define ASSERT_LOCAL(this, healer) \
- if (!afr_shd_is_subvol_local(this, healer->subvol)) { \
- healer->local = _gf_false; \
- if (safe_break (healer)) { \
- break; \
- } else { \
- continue; \
- } \
- } else { \
- healer->local = _gf_true; \
- }
-
-
-#define NTH_INDEX_HEALER(this, n) &((((afr_private_t *)this->private))->shd.index_healers[n])
-#define NTH_FULL_HEALER(this, n) &((((afr_private_t *)this->private))->shd.full_healers[n])
+#include <glusterfs/syncop-utils.h>
+#include "afr-messages.h"
+#include <glusterfs/byte-order.h>
+
+#define AFR_EH_SPLIT_BRAIN_LIMIT 1024
+#define AFR_STATISTICS_HISTORY_SIZE 50
+
+#define ASSERT_LOCAL(this, healer) \
+ if (!afr_shd_is_subvol_local(this, healer->subvol)) { \
+ healer->local = _gf_false; \
+ if (safe_break(healer)) { \
+ break; \
+ } else { \
+ continue; \
+ } \
+ } else { \
+ healer->local = _gf_true; \
+ }
+
+#define NTH_INDEX_HEALER(this, n) \
+ &((((afr_private_t *)this->private))->shd.index_healers[n])
+#define NTH_FULL_HEALER(this, n) \
+ &((((afr_private_t *)this->private))->shd.full_healers[n])
char *
-afr_subvol_name (xlator_t *this, int subvol)
+afr_subvol_name(xlator_t *this, int subvol)
{
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
- priv = this->private;
- if (subvol < 0 || subvol > priv->child_count)
- return NULL;
+ priv = this->private;
+ if (subvol < 0 || subvol > priv->child_count)
+ return NULL;
- return priv->children[subvol]->name;
+ return priv->children[subvol]->name;
}
-
void
-afr_destroy_crawl_event_data (void *data)
+afr_destroy_crawl_event_data(void *data)
{
- return;
+ return;
}
-
void
-afr_destroy_shd_event_data (void *data)
+afr_destroy_shd_event_data(void *data)
{
- shd_event_t *shd_event = data;
-
- if (!shd_event)
- return;
- GF_FREE (shd_event->path);
+ shd_event_t *shd_event = data;
+ if (!shd_event)
return;
-}
+ GF_FREE(shd_event->path);
+ return;
+}
gf_boolean_t
-afr_shd_is_subvol_local (xlator_t *this, int subvol)
+afr_shd_is_subvol_local(xlator_t *this, int subvol)
{
- afr_private_t *priv = NULL;
- gf_boolean_t is_local = _gf_false;
- loc_t loc = {0, };
-
- loc.inode = this->itable->root;
- gf_uuid_copy (loc.gfid, loc.inode->gfid);
- priv = this->private;
- syncop_is_subvol_local(priv->children[subvol], &loc, &is_local);
- return is_local;
+ afr_private_t *priv = NULL;
+ gf_boolean_t is_local = _gf_false;
+ loc_t loc = {
+ 0,
+ };
+
+ loc.inode = this->itable->root;
+ gf_uuid_copy(loc.gfid, loc.inode->gfid);
+ priv = this->private;
+ syncop_is_subvol_local(priv->children[subvol], &loc, &is_local);
+ return is_local;
}
-
int
-__afr_shd_healer_wait (struct subvol_healer *healer)
+__afr_shd_healer_wait(struct subvol_healer *healer)
{
- afr_private_t *priv = NULL;
- struct timespec wait_till = {0, };
- int ret = 0;
+ afr_private_t *priv = NULL;
+ struct timespec wait_till = {
+ 0,
+ };
+ int ret = 0;
- priv = healer->this->private;
+ priv = healer->this->private;
disabled_loop:
- wait_till.tv_sec = time (NULL) + priv->shd.timeout;
+ wait_till.tv_sec = gf_time() + priv->shd.timeout;
- while (!healer->rerun) {
- ret = pthread_cond_timedwait (&healer->cond,
- &healer->mutex,
- &wait_till);
- if (ret == ETIMEDOUT)
- break;
- }
+ while (!healer->rerun) {
+ ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till);
+ if (ret == ETIMEDOUT)
+ break;
+ }
- ret = healer->rerun;
- healer->rerun = 0;
+ ret = healer->rerun;
+ healer->rerun = 0;
- if (!priv->shd.enabled)
- goto disabled_loop;
+ if (!priv->shd.enabled)
+ goto disabled_loop;
- return ret;
+ return ret;
}
-
int
-afr_shd_healer_wait (struct subvol_healer *healer)
+afr_shd_healer_wait(struct subvol_healer *healer)
{
- int ret = 0;
+ int ret = 0;
- pthread_mutex_lock (&healer->mutex);
- {
- ret = __afr_shd_healer_wait (healer);
- }
- pthread_mutex_unlock (&healer->mutex);
+ pthread_mutex_lock(&healer->mutex);
+ {
+ ret = __afr_shd_healer_wait(healer);
+ }
+ pthread_mutex_unlock(&healer->mutex);
- return ret;
+ return ret;
}
-
gf_boolean_t
-safe_break (struct subvol_healer *healer)
+safe_break(struct subvol_healer *healer)
{
- gf_boolean_t ret = _gf_false;
+ gf_boolean_t ret = _gf_false;
- pthread_mutex_lock (&healer->mutex);
- {
- if (healer->rerun)
- goto unlock;
+ pthread_mutex_lock(&healer->mutex);
+ {
+ if (healer->rerun)
+ goto unlock;
- healer->running = _gf_false;
- ret = _gf_true;
- }
+ healer->running = _gf_false;
+ ret = _gf_true;
+ }
unlock:
- pthread_mutex_unlock (&healer->mutex);
+ pthread_mutex_unlock(&healer->mutex);
- return ret;
+ return ret;
}
-
inode_t *
-afr_shd_inode_find (xlator_t *this, xlator_t *subvol, uuid_t gfid)
+afr_shd_inode_find(xlator_t *this, xlator_t *subvol, uuid_t gfid)
{
- inode_t *inode = NULL;
- int ret = 0;
- loc_t loc = {0, };
- struct iatt iatt = {0, };
-
- inode = inode_find (this->itable, gfid);
- if (inode) {
- inode_lookup (inode);
- goto out;
- }
-
- loc.inode = inode_new (this->itable);
- if (!loc.inode)
- goto out;
- gf_uuid_copy (loc.gfid, gfid);
-
- ret = syncop_lookup (subvol, &loc, &iatt, NULL, NULL, NULL);
- if (ret < 0)
- goto out;
-
- inode = inode_link (loc.inode, NULL, NULL, &iatt);
- if (inode)
- inode_lookup (inode);
+ int ret = 0;
+ uint64_t val = IA_INVAL;
+ dict_t *xdata = NULL;
+ dict_t *rsp_dict = NULL;
+ inode_t *inode = NULL;
+
+ xdata = dict_new();
+ if (!xdata)
+ goto out;
+
+ ret = dict_set_int8(xdata, GF_INDEX_IA_TYPE_GET_REQ, 1);
+ if (ret)
+ goto out;
+
+ ret = syncop_inode_find(this, subvol, gfid, &inode, xdata, &rsp_dict);
+ if (ret < 0)
+ goto out;
+
+ if (rsp_dict) {
+ ret = dict_get_uint64(rsp_dict, GF_INDEX_IA_TYPE_GET_RSP, &val);
+ if (ret)
+ goto out;
+ }
+ ret = inode_ctx_set2(inode, subvol, 0, &val);
out:
- loc_wipe (&loc);
- return inode;
+ if (ret && inode) {
+ inode_unref(inode);
+ inode = NULL;
+ }
+ if (xdata)
+ dict_unref(xdata);
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+ return inode;
}
-
-inode_t*
-afr_shd_index_inode (xlator_t *this, xlator_t *subvol)
+inode_t *
+afr_shd_index_inode(xlator_t *this, xlator_t *subvol, char *vgfid)
{
- loc_t rootloc = {0, };
- inode_t *inode = NULL;
- int ret = 0;
- dict_t *xattr = NULL;
- void *index_gfid = NULL;
+ loc_t rootloc = {
+ 0,
+ };
+ inode_t *inode = NULL;
+ int ret = 0;
+ dict_t *xattr = NULL;
+ void *index_gfid = NULL;
- rootloc.inode = inode_ref (this->itable->root);
- gf_uuid_copy (rootloc.gfid, rootloc.inode->gfid);
+ rootloc.inode = inode_ref(this->itable->root);
+ gf_uuid_copy(rootloc.gfid, rootloc.inode->gfid);
- ret = syncop_getxattr (subvol, &rootloc, &xattr,
- GF_XATTROP_INDEX_GFID, NULL, NULL);
- if (ret || !xattr) {
- errno = -ret;
- goto out;
- }
+ ret = syncop_getxattr(subvol, &rootloc, &xattr, vgfid, NULL, NULL);
+ if (ret || !xattr) {
+ errno = -ret;
+ goto out;
+ }
- ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid);
- if (ret)
- goto out;
+ ret = dict_get_ptr(xattr, vgfid, &index_gfid);
+ if (ret)
+ goto out;
- gf_log (this->name, GF_LOG_DEBUG, "index-dir gfid for %s: %s",
- subvol->name, uuid_utoa (index_gfid));
+ gf_msg_debug(this->name, 0, "%s dir gfid for %s: %s", vgfid, subvol->name,
+ uuid_utoa(index_gfid));
- inode = afr_shd_inode_find (this, subvol, index_gfid);
+ inode = afr_shd_inode_find(this, subvol, index_gfid);
out:
- loc_wipe (&rootloc);
+ loc_wipe(&rootloc);
- if (xattr)
- dict_unref (xattr);
+ if (xattr)
+ dict_unref(xattr);
- return inode;
+ return inode;
}
int
-afr_shd_index_purge (xlator_t *subvol, inode_t *inode, char *name)
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
+ ia_type_t type)
{
- loc_t loc = {0, };
- int ret = 0;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
- loc.parent = inode_ref (inode);
- loc.name = name;
+ loc.parent = inode_ref(inode);
+ loc.name = name;
- ret = syncop_unlink (subvol, &loc, NULL, NULL);
+ if (IA_ISDIR(type))
+ ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
+ else
+ ret = syncop_unlink(subvol, &loc, NULL, NULL);
- loc_wipe (&loc);
- return ret;
+ loc_wipe(&loc);
+ return ret;
}
void
-afr_shd_zero_xattrop (xlator_t *this, uuid_t gfid)
+afr_shd_zero_xattrop(xlator_t *this, uuid_t gfid)
{
-
- call_frame_t *frame = NULL;
- inode_t *inode = NULL;
- afr_private_t *priv = NULL;
- dict_t *xattr = NULL;
- int ret = 0;
- int i = 0;
- int raw[AFR_NUM_CHANGE_LOGS] = {0};
-
- priv = this->private;
- frame = afr_frame_create (this);
- if (!frame)
- goto out;
- inode = afr_inode_find (this, gfid);
- if (!inode)
- goto out;
- xattr = dict_new();
- if (!xattr)
- goto out;
- ret = dict_set_static_bin (xattr, AFR_DIRTY, raw,
- sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ call_frame_t *frame = NULL;
+ inode_t *inode = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int ret = 0;
+ int i = 0;
+ int raw[AFR_NUM_CHANGE_LOGS] = {0};
+
+ priv = this->private;
+ frame = afr_frame_create(this, NULL);
+ if (!frame)
+ goto out;
+ inode = afr_inode_find(this, gfid);
+ if (!inode)
+ goto out;
+ xattr = dict_new();
+ if (!xattr)
+ goto out;
+ ret = dict_set_static_bin(xattr, AFR_DIRTY, raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_static_bin(xattr, priv->pending_key[i], raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
if (ret)
- goto out;
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_static_bin (xattr, priv->pending_key[i], raw,
- sizeof(int) * AFR_NUM_CHANGE_LOGS);
- if (ret)
- goto out;
- }
+ goto out;
+ }
- /*Send xattrop to all bricks. Doing a lookup to see if bricks are up or
- * has valid repies for this gfid seems a bit of an overkill.*/
- for (i = 0; i < priv->child_count; i++)
- afr_selfheal_post_op (frame, this, inode, i, xattr);
+ /*Send xattrop to all bricks. Doing a lookup to see if bricks are up or
+ * has valid repies for this gfid seems a bit of an overkill.*/
+ for (i = 0; i < priv->child_count; i++)
+ afr_selfheal_post_op(frame, this, inode, i, xattr, NULL);
out:
- if (frame)
- AFR_STACK_DESTROY (frame);
- if (inode)
- inode_unref (inode);
- if (xattr)
- dict_unref (xattr);
- return;
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ if (inode)
+ inode_unref(inode);
+ if (xattr)
+ dict_unref(xattr);
+ return;
}
int
-afr_shd_selfheal_name (struct subvol_healer *healer, int child, uuid_t parent,
- const char *bname)
+afr_shd_selfheal_name(struct subvol_healer *healer, int child, uuid_t parent,
+ const char *bname)
{
- int ret = -1;
+ int ret = -1;
- ret = afr_selfheal_name (THIS, parent, bname, NULL);
+ ret = afr_selfheal_name(THIS, parent, bname, NULL, NULL);
- return ret;
+ return ret;
}
int
-afr_shd_selfheal (struct subvol_healer *healer, int child, uuid_t gfid)
+afr_shd_selfheal(struct subvol_healer *healer, int child, uuid_t gfid)
{
- int ret = 0;
- eh_t *eh = NULL;
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- shd_event_t *shd_event = NULL;
- char *path = NULL;
- xlator_t *subvol = NULL;
- xlator_t *this = NULL;
- crawl_event_t *crawl_event = NULL;
-
- this = healer->this;
- priv = this->private;
- shd = &priv->shd;
- crawl_event = &healer->crawl_event;
-
- subvol = priv->children[child];
-
- //If this fails with ENOENT/ESTALE index is stale
- ret = syncop_gfid_to_path (this->itable, subvol, gfid, &path);
- if (ret < 0)
- return ret;
-
- ret = afr_selfheal (this, gfid);
-
- if (ret == -EIO) {
- eh = shd->split_brain;
- crawl_event->split_brain_count++;
- } else if (ret < 0) {
- crawl_event->heal_failed_count++;
- } else if (ret == 0) {
- crawl_event->healed_count++;
- }
-
- if (eh) {
- shd_event = GF_CALLOC (1, sizeof(*shd_event),
- gf_afr_mt_shd_event_t);
- if (!shd_event)
- goto out;
-
- shd_event->child = child;
- shd_event->path = path;
-
- if (eh_save_history (eh, shd_event) < 0)
- goto out;
-
- shd_event = NULL;
- path = NULL;
- }
+ int ret = 0;
+ eh_t *eh = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ shd_event_t *shd_event = NULL;
+ char *path = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ crawl_event_t *crawl_event = NULL;
+
+ this = healer->this;
+ priv = this->private;
+ shd = &priv->shd;
+ crawl_event = &healer->crawl_event;
+
+ subvol = priv->children[child];
+
+ // If this fails with ENOENT/ESTALE index is stale
+ ret = syncop_gfid_to_path(this->itable, subvol, gfid, &path);
+ if (ret < 0)
+ return ret;
+
+ ret = afr_selfheal(this, gfid);
+
+ LOCK(&priv->lock);
+ {
+ if (ret == -EIO) {
+ eh = shd->split_brain;
+ crawl_event->split_brain_count++;
+ } else if (ret < 0) {
+ crawl_event->heal_failed_count++;
+ } else if (ret == 0) {
+ crawl_event->healed_count++;
+ }
+ }
+ UNLOCK(&priv->lock);
+
+ if (eh) {
+ shd_event = GF_CALLOC(1, sizeof(*shd_event), gf_afr_mt_shd_event_t);
+ if (!shd_event)
+ goto out;
+
+ shd_event->child = child;
+ shd_event->path = path;
+
+ if (eh_save_history(eh, shd_event) < 0)
+ goto out;
+
+ shd_event = NULL;
+ path = NULL;
+ }
out:
- GF_FREE (shd_event);
- GF_FREE (path);
- return ret;
+ GF_FREE(shd_event);
+ GF_FREE(path);
+ return ret;
}
-
void
-afr_shd_sweep_prepare (struct subvol_healer *healer)
+afr_shd_sweep_prepare(struct subvol_healer *healer)
{
- crawl_event_t *event = NULL;
+ crawl_event_t *event = NULL;
- event = &healer->crawl_event;
+ event = &healer->crawl_event;
- event->healed_count = 0;
- event->split_brain_count = 0;
- event->heal_failed_count = 0;
+ event->healed_count = 0;
+ event->split_brain_count = 0;
+ event->heal_failed_count = 0;
- time (&event->start_time);
- event->end_time = 0;
+ event->start_time = gf_time();
+ event->end_time = 0;
+ _mask_cancellation();
}
-
void
-afr_shd_sweep_done (struct subvol_healer *healer)
+afr_shd_sweep_done(struct subvol_healer *healer)
{
- crawl_event_t *event = NULL;
- crawl_event_t *history = NULL;
- afr_self_heald_t *shd = NULL;
+ crawl_event_t *event = NULL;
+ crawl_event_t *history = NULL;
+ afr_self_heald_t *shd = NULL;
- event = &healer->crawl_event;
- shd = &(((afr_private_t *)healer->this->private)->shd);
+ event = &healer->crawl_event;
+ shd = &(((afr_private_t *)healer->this->private)->shd);
- time (&event->end_time);
- history = memdup (event, sizeof (*event));
- event->start_time = 0;
+ event->end_time = gf_time();
+ history = gf_memdup(event, sizeof(*event));
+ event->start_time = 0;
- if (!history)
- return;
+ if (!history)
+ return;
- if (eh_save_history (shd->statistics[healer->subvol], history) < 0)
- GF_FREE (history);
+ if (eh_save_history(shd->statistics[healer->subvol], history) < 0)
+ GF_FREE(history);
+ _unmask_cancellation();
}
int
-afr_shd_index_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
- void *data)
+afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
{
- struct subvol_healer *healer = data;
- afr_private_t *priv = NULL;
- uuid_t gfid = {0};
- int ret = 0;
+ struct subvol_healer *healer = data;
+ afr_private_t *priv = NULL;
+ uuid_t gfid = {0};
+ int ret = 0;
+ uint64_t val = IA_INVAL;
- priv = healer->this->private;
- if (!priv->shd.enabled)
- return -EBUSY;
+ priv = healer->this->private;
+ if (!priv->shd.enabled)
+ return -EBUSY;
- gf_log (healer->this->name, GF_LOG_DEBUG, "got entry: %s",
- entry->d_name);
+ gf_msg_debug(healer->this->name, 0, "got entry: %s from %s", entry->d_name,
+ priv->children[healer->subvol]->name);
- ret = gf_uuid_parse (entry->d_name, gfid);
- if (ret)
- return 0;
+ ret = gf_uuid_parse(entry->d_name, gfid);
+ if (ret)
+ return 0;
- ret = afr_shd_selfheal (healer, healer->subvol, gfid);
+ inode_ctx_get2(parent->inode, subvol, NULL, &val);
- if (ret == -ENOENT || ret == -ESTALE)
- afr_shd_index_purge (subvol, parent->inode, entry->d_name);
- if (ret == 2)
- /* If bricks crashed in pre-op after creating indices/xattrop
- * link but before setting afr changelogs, we end up with stale
- * xattrop links but zero changelogs. Remove such entries by
- * sending a post-op with zero changelogs.
- */
- afr_shd_zero_xattrop (healer->this, gfid);
+ ret = afr_shd_selfheal(healer, healer->subvol, gfid);
- return 0;
+ if (ret == -ENOENT || ret == -ESTALE)
+ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val);
+
+ if (ret == 2)
+ /* If bricks crashed in pre-op after creating indices/xattrop
+ * link but before setting afr changelogs, we end up with stale
+ * xattrop links but zero changelogs. Remove such entries by
+ * sending a post-op with zero changelogs.
+ */
+ afr_shd_zero_xattrop(healer->this, gfid);
+
+ return 0;
}
int
-afr_shd_index_sweep (struct subvol_healer *healer)
+afr_shd_index_sweep(struct subvol_healer *healer, char *vgfid)
{
- loc_t loc = {0};
- afr_private_t *priv = NULL;
- int ret = 0;
- xlator_t *subvol = NULL;
+ loc_t loc = {0};
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ xlator_t *subvol = NULL;
+ dict_t *xdata = NULL;
+ call_frame_t *frame = NULL;
+
+ priv = healer->this->private;
+ subvol = priv->children[healer->subvol];
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+
+ loc.inode = afr_shd_index_inode(healer->this, subvol, vgfid);
+ if (!loc.inode) {
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_INDEX_DIR_GET_FAILED, "unable to get index-dir on %s",
+ subvol->name);
+ ret = -errno;
+ goto out;
+ }
+
+ xdata = dict_new();
+ if (!xdata || dict_set_int32_sizen(xdata, "get-gfid-type", 1)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_mt_dir_scan(frame, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
+ healer, afr_shd_index_heal, xdata,
+ priv->shd.max_threads, priv->shd.wait_qlength);
+
+ if (ret == 0)
+ ret = healer->crawl_event.healed_count;
- priv = healer->this->private;
- subvol = priv->children[healer->subvol];
+out:
+ loc_wipe(&loc);
- loc.inode = afr_shd_index_inode (healer->this, subvol);
- if (!loc.inode) {
- gf_log (healer->this->name, GF_LOG_WARNING,
- "unable to get index-dir on %s", subvol->name);
- return -errno;
- }
+ if (xdata)
+ dict_unref(xdata);
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ return ret;
+}
- ret = syncop_dir_scan (subvol, &loc, GF_CLIENT_PID_AFR_SELF_HEALD,
- healer, afr_shd_index_heal);
+int
+afr_shd_index_sweep_all(struct subvol_healer *healer)
+{
+ int ret = 0;
+ int count = 0;
+
+ ret = afr_shd_index_sweep(healer, GF_XATTROP_INDEX_GFID);
+ if (ret < 0)
+ goto out;
+ count = ret;
+
+ ret = afr_shd_index_sweep(healer, GF_XATTROP_DIRTY_GFID);
+ if (ret < 0)
+ goto out;
+ count += ret;
+
+ ret = afr_shd_index_sweep(healer, GF_XATTROP_ENTRY_CHANGES_GFID);
+ if (ret < 0)
+ goto out;
+ count += ret;
+out:
+ if (ret < 0)
+ return ret;
+ else
+ return count;
+}
- inode_forget (loc.inode, 1);
- loc_wipe (&loc);
+int
+afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ struct subvol_healer *healer = data;
+ xlator_t *this = healer->this;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (this->cleanup_starting) {
+ return -ENOTCONN;
+ }
- if (ret == 0)
- ret = healer->crawl_event.healed_count;
+ if (!priv->shd.enabled)
+ return -EBUSY;
- return ret;
+ afr_shd_selfheal_name(healer, healer->subvol, parent->inode->gfid,
+ entry->d_name);
+
+ afr_shd_selfheal(healer, healer->subvol, entry->d_stat.ia_gfid);
+
+ return 0;
}
int
-afr_shd_full_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
- void *data)
+afr_shd_full_sweep(struct subvol_healer *healer, inode_t *inode)
{
- struct subvol_healer *healer = data;
- xlator_t *this = healer->this;
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
+ loc_t loc = {0};
- priv = this->private;
- if (!priv->shd.enabled)
- return -EBUSY;
+ priv = healer->this->private;
+ loc.inode = inode;
+ return syncop_ftw(priv->children[healer->subvol], &loc,
+ GF_CLIENT_PID_SELF_HEALD, healer, afr_shd_full_heal);
+}
- afr_shd_selfheal_name (healer, healer->subvol,
- parent->inode->gfid, entry->d_name);
+int
+afr_shd_fill_ta_loc(xlator_t *this, loc_t *loc)
+{
+ afr_private_t *priv = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ int ret = -1;
+
+ priv = this->private;
+ loc->parent = inode_ref(this->itable->root);
+ gf_uuid_copy(loc->pargfid, loc->parent->gfid);
+ loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX];
+ loc->inode = inode_new(loc->parent->table);
+ GF_CHECK_ALLOC(loc->inode, ret, out);
+
+ if (!gf_uuid_is_null(priv->ta_gfid))
+ goto assign_gfid;
+
+ ret = syncop_lookup(priv->children[THIN_ARBITER_BRICK_INDEX], loc, &stbuf,
+ 0, 0, 0);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed lookup on file %s.", loc->name);
+ goto out;
+ }
+
+ gf_uuid_copy(priv->ta_gfid, stbuf.ia_gfid);
+
+assign_gfid:
+ gf_uuid_copy(loc->gfid, priv->ta_gfid);
+ ret = 0;
- afr_shd_selfheal (healer, healer->subvol, entry->d_stat.ia_gfid);
+out:
+ if (ret)
+ loc_wipe(loc);
- return 0;
+ return ret;
}
int
-afr_shd_full_sweep (struct subvol_healer *healer, inode_t *inode)
+_afr_shd_ta_get_xattrs(xlator_t *this, loc_t *loc, dict_t **xdata)
{
- afr_private_t *priv = NULL;
- loc_t loc = {0};
-
- priv = healer->this->private;
- loc.inode = inode;
- return syncop_ftw (priv->children[healer->subvol], &loc,
- GF_CLIENT_PID_AFR_SELF_HEALD, healer,
- afr_shd_full_heal);
-}
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int raw[AFR_NUM_CHANGE_LOGS] = {
+ 0,
+ };
+ int ret = -1;
+ int i = 0;
+
+ priv = this->private;
+
+ xattr = dict_new();
+ if (!xattr) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_DICT_GET_FAILED,
+ "Failed to create dict.");
+ goto out;
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_static_bin(xattr, priv->pending_key[i], &raw,
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret)
+ goto out;
+ }
+ ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], loc,
+ GF_XATTROP_ADD_ARRAY, xattr, NULL, xdata, NULL);
+ if (ret || !(*xdata)) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Xattrop failed on %s.", loc->name);
+ }
-void *
-afr_shd_index_healer (void *data)
-{
- struct subvol_healer *healer = NULL;
- xlator_t *this = NULL;
- int ret = 0;
-
- healer = data;
- THIS = this = healer->this;
-
- for (;;) {
- afr_shd_healer_wait (healer);
-
- ASSERT_LOCAL(this, healer);
-
- do {
- gf_log (this->name, GF_LOG_DEBUG,
- "starting index sweep on subvol %s",
- afr_subvol_name (this, healer->subvol));
-
- afr_shd_sweep_prepare (healer);
-
- ret = afr_shd_index_sweep (healer);
-
- afr_shd_sweep_done (healer);
- /*
- As long as at least one gfid was
- healed, keep retrying. We may have
- just healed a directory and thereby
- created entries for other gfids which
- could not be healed thus far.
- */
-
- gf_log (this->name, GF_LOG_DEBUG,
- "finished index sweep on subvol %s",
- afr_subvol_name (this, healer->subvol));
- /*
- Give a pause before retrying to avoid a busy loop
- in case the only entry in index is because of
- an ongoing I/O.
- */
- sleep (1);
- } while (ret > 0);
- }
-
- return NULL;
-}
+out:
+ if (xattr)
+ dict_unref(xattr);
+ return ret;
+}
-void *
-afr_shd_full_healer (void *data)
+void
+afr_shd_ta_get_xattrs(xlator_t *this, loc_t *loc, struct subvol_healer *healer,
+ dict_t **xdata)
{
- struct subvol_healer *healer = NULL;
- xlator_t *this = NULL;
- int run = 0;
+ int ret = 0;
+
+ loc_wipe(loc);
+ if (afr_shd_fill_ta_loc(this, loc)) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate thin-arbiter loc for: %s.", loc->name);
+ ret = -1;
+ goto out;
+ }
+
+ ret = afr_ta_post_op_lock(this, loc);
+ if (ret)
+ goto out;
+
+ ret = _afr_shd_ta_get_xattrs(this, loc, xdata);
+ if (ret) {
+ if (*xdata) {
+ dict_unref(*xdata);
+ *xdata = NULL;
+ }
+ }
- healer = data;
- THIS = this = healer->this;
+ afr_ta_post_op_unlock(this, loc);
- for (;;) {
- pthread_mutex_lock (&healer->mutex);
- {
- run = __afr_shd_healer_wait (healer);
- if (!run)
- healer->running = _gf_false;
- }
- pthread_mutex_unlock (&healer->mutex);
+out:
+ if (ret)
+ healer->rerun = 1;
+}
- if (!run)
- break;
+int
+afr_shd_ta_unset_xattrs(xlator_t *this, loc_t *loc, dict_t **xdata, int healer)
+{
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ gf_boolean_t need_xattrop = _gf_false;
+ void *pending_raw = NULL;
+ int *raw = NULL;
+ int pending[AFR_NUM_CHANGE_LOGS] = {
+ 0,
+ };
+ int i = 0;
+ int j = 0;
+ int val = 0;
+ int ret = -1;
+
+ priv = this->private;
+
+ xattr = dict_new();
+ if (!xattr) {
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ raw = GF_CALLOC(AFR_NUM_CHANGE_LOGS, sizeof(int), gf_afr_mt_int32_t);
+ if (!raw) {
+ goto out;
+ }
- ASSERT_LOCAL(this, healer);
+ ret = dict_get_ptr(*xdata, priv->pending_key[i], &pending_raw);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "Error getting value "
+ "of pending key %s",
+ priv->pending_key[i]);
+ GF_FREE(raw);
+ goto out;
+ }
- gf_log (this->name, GF_LOG_INFO,
- "starting full sweep on subvol %s",
- afr_subvol_name (this, healer->subvol));
+ memcpy(pending, pending_raw, sizeof(pending));
+ for (j = 0; j < AFR_NUM_CHANGE_LOGS; j++) {
+ val = ntoh32(pending[j]);
+ if (val) {
+ if (i == healer) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_THIN_ARB,
+ "I am "
+ "not the good shd. Skipping. "
+ "SHD = %d.",
+ healer);
+ ret = 0;
+ GF_FREE(raw);
+ goto out;
+ }
+ need_xattrop = _gf_true;
+ raw[j] = hton32(-val);
+ }
+ }
- afr_shd_sweep_prepare (healer);
+ ret = dict_set_bin(xattr, priv->pending_key[i], raw,
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret) {
+ GF_FREE(raw);
+ goto out;
+ }
- afr_shd_full_sweep (healer, this->itable->root);
+ if (need_xattrop)
+ break;
+ }
- afr_shd_sweep_done (healer);
+ if (!need_xattrop) {
+ ret = 0;
+ goto out;
+ }
- gf_log (this->name, GF_LOG_INFO,
- "finished full sweep on subvol %s",
- afr_subvol_name (this, healer->subvol));
- }
+ ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], loc,
+ GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Xattrop failed.");
- return NULL;
-}
+out:
+ if (xattr)
+ dict_unref(xattr);
+ return ret;
+}
-int
-afr_shd_healer_init (xlator_t *this, struct subvol_healer *healer)
+void
+afr_shd_ta_check_and_unset_xattrs(xlator_t *this, loc_t *loc,
+ struct subvol_healer *healer,
+ dict_t *pre_crawl_xdata)
{
- int ret = 0;
+ int ret_lock = 0;
+ int ret = 0;
+ dict_t *post_crawl_xdata = NULL;
- ret = pthread_mutex_init (&healer->mutex, NULL);
- if (ret)
- goto out;
+ ret_lock = afr_ta_post_op_lock(this, loc);
+ if (ret_lock)
+ goto unref;
- ret = pthread_cond_init (&healer->cond, NULL);
- if (ret)
- goto out;
-
- healer->this = this;
- healer->running = _gf_false;
- healer->rerun = _gf_false;
- healer->local = _gf_false;
-out:
- return ret;
-}
+ ret = _afr_shd_ta_get_xattrs(this, loc, &post_crawl_xdata);
+ if (ret)
+ goto unref;
+ if (!are_dicts_equal(pre_crawl_xdata, post_crawl_xdata, NULL, NULL)) {
+ ret = -1;
+ goto unref;
+ }
-int
-afr_shd_healer_spawn (xlator_t *this, struct subvol_healer *healer,
- void *(threadfn)(void *))
-{
- int ret = 0;
-
- pthread_mutex_lock (&healer->mutex);
- {
- if (healer->running) {
- pthread_cond_signal (&healer->cond);
- } else {
- ret = gf_thread_create (&healer->thread, NULL,
- threadfn, healer);
- if (ret)
- goto unlock;
- healer->running = 1;
- }
-
- healer->rerun = 1;
- }
-unlock:
- pthread_mutex_unlock (&healer->mutex);
+ ret = afr_shd_ta_unset_xattrs(this, loc, &post_crawl_xdata, healer->subvol);
- return ret;
-}
+unref:
+ if (post_crawl_xdata) {
+ dict_unref(post_crawl_xdata);
+ post_crawl_xdata = NULL;
+ }
+ if (ret || ret_lock)
+ healer->rerun = 1;
-int
-afr_shd_full_healer_spawn (xlator_t *this, int subvol)
-{
- return afr_shd_healer_spawn (this, NTH_FULL_HEALER (this, subvol),
- afr_shd_full_healer);
+ if (!ret_lock)
+ afr_ta_post_op_unlock(this, loc);
}
-
-int
-afr_shd_index_healer_spawn (xlator_t *this, int subvol)
+gf_boolean_t
+afr_bricks_available_for_heal(afr_private_t *priv)
{
- return afr_shd_healer_spawn (this, NTH_INDEX_HEALER (this, subvol),
- afr_shd_index_healer);
-}
+ int up_children = 0;
+ up_children = __afr_get_up_children_count(priv);
+ if (up_children < 2) {
+ return _gf_false;
+ }
+ return _gf_true;
+}
-int
-afr_shd_dict_add_crawl_event (xlator_t *this, dict_t *output,
- crawl_event_t *crawl_event)
+static gf_boolean_t
+afr_shd_ta_needs_heal(xlator_t *this, struct subvol_healer *healer)
{
- int ret = 0;
- uint64_t count = 0;
- char key[256] = {0};
- int xl_id = 0;
- uint64_t healed_count = 0;
- uint64_t split_brain_count = 0;
- uint64_t heal_failed_count = 0;
- char *start_time_str = 0;
- char *end_time_str = NULL;
- char *crawl_type = NULL;
- int progress = -1;
- int child = -1;
-
- child = crawl_event->child;
- healed_count = crawl_event->healed_count;
- split_brain_count = crawl_event->split_brain_count;
- heal_failed_count = crawl_event->heal_failed_count;
- crawl_type = crawl_event->crawl_type;
-
- if (!crawl_event->start_time)
- goto out;
-
- start_time_str = gf_strdup (ctime (&crawl_event->start_time));
-
- if (crawl_event->end_time)
- end_time_str = gf_strdup (ctime (&crawl_event->end_time));
-
- ret = dict_get_int32 (output, this->name, &xl_id);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "xl does not have id");
- goto out;
+ dict_t *xdata = NULL;
+ afr_private_t *priv = NULL;
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ int i = 0;
+ gf_boolean_t need_heal = _gf_false;
+
+ priv = this->private;
+
+ ret = afr_shd_fill_ta_loc(this, &loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate thin-arbiter loc for: %s.", loc.name);
+ healer->rerun = 1;
+ goto out;
+ }
+
+ if (_afr_shd_ta_get_xattrs(this, &loc, &xdata)) {
+ healer->rerun = 1;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (afr_ta_dict_contains_pending_xattr(xdata, priv, i)) {
+ need_heal = _gf_true;
+ break;
}
+ }
- snprintf (key, sizeof (key), "statistics-%d-%d-count", xl_id, child);
- ret = dict_get_uint64 (output, key, &count);
+out:
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(&loc);
+ return need_heal;
+}
- snprintf (key, sizeof (key), "statistics_healed_cnt-%d-%d-%"PRIu64,
- xl_id, child, count);
- ret = dict_set_uint64(output, key, healed_count);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not add statistics_healed_count to outout");
- goto out;
- }
-
- snprintf (key, sizeof (key), "statistics_sb_cnt-%d-%d-%"PRIu64,
- xl_id, child, count);
- ret = dict_set_uint64 (output, key, split_brain_count);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not add statistics_split_brain_count to outout");
- goto out;
+static int
+afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ struct subvol_healer *healer = data;
+ afr_private_t *priv = healer->this->private;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+ loc_t loc = {0};
+ int count = 0;
+ int i = 0;
+ int op_errno = 0;
+ struct iatt *iatt = NULL;
+ gf_boolean_t multiple_links = _gf_false;
+ unsigned char *gfid_present = alloca0(priv->child_count);
+ unsigned char *entry_present = alloca0(priv->child_count);
+ char *type = "file";
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+ local = frame->local;
+ if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) {
+ gf_msg_debug(healer->this->name, 0,
+ "Not all bricks are up. Skipping "
+ "cleanup of %s on %s",
+ entry->d_name, subvol->name);
+ ret = 0;
+ goto out;
+ }
+
+ loc.inode = inode_new(parent->inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = gf_uuid_parse(entry->d_name, loc.gfid);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ count++;
+ gfid_present[i] = 1;
+ iatt = &local->replies[i].poststat;
+ if (iatt->ia_type == IA_IFDIR) {
+ type = "dir";
+ }
+
+ if (i == healer->subvol) {
+ if (local->replies[i].poststat.ia_nlink > 1) {
+ multiple_links = _gf_true;
+ }
+ }
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ /*We don't have complete view. Skip the entry*/
+ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
+ "Skipping cleanup of %s on %s", entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
}
-
- snprintf (key, sizeof (key), "statistics_crawl_type-%d-%d-%"PRIu64,
- xl_id, child, count);
- ret = dict_set_str (output, key, crawl_type);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not add statistics_crawl_type to output");
- goto out;
+ }
+
+ /*Inode is deleted from subvol*/
+ if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) {
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type,
+ priv->anon_inode_name, entry->d_name, subvol->name);
+ ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name,
+ iatt->ia_type);
+ if (ret == -ENOENT || ret == -ESTALE)
+ ret = 0;
+ } else if (count > 1) {
+ loc_wipe(&loc);
+ loc.parent = inode_ref(parent->inode);
+ loc.name = entry->d_name;
+ loc.inode = inode_new(parent->inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
}
-
- snprintf (key, sizeof (key), "statistics_heal_failed_cnt-%d-%d-%"PRIu64,
- xl_id, child, count);
- ret = dict_set_uint64 (output, key, heal_failed_count);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not add statistics_healed_failed_count to outout");
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup,
+ &loc, NULL);
+ count = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ count++;
+ entry_present[i] = 1;
+ iatt = &local->replies[i].poststat;
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ /*We don't have complete view. Skip the entry*/
+ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
+ "Skipping cleanup of %s on %s", entry->d_name,
+ subvol->name);
+ ret = 0;
goto out;
+ }
}
-
- snprintf (key, sizeof (key), "statistics_strt_time-%d-%d-%"PRIu64,
- xl_id, child, count);
- ret = dict_set_dynstr (output, key, start_time_str);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not add statistics_crawl_start_time to outout");
- goto out;
- } else {
- start_time_str = NULL;
- }
-
- if (!end_time_str)
- progress = 1;
- else
- progress = 0;
-
- snprintf (key, sizeof (key), "statistics_end_time-%d-%d-%"PRIu64,
- xl_id, child, count);
- if (!end_time_str)
- end_time_str = gf_strdup ("Could not determine the end time");
- ret = dict_set_dynstr (output, key, end_time_str);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not add statistics_crawl_end_time to outout");
+ for (i = 0; i < priv->child_count; i++) {
+ if (gfid_present[i] && !entry_present[i]) {
+ /*Entry is not anonymous on at least one subvol*/
+ gf_msg_debug(healer->this->name, 0,
+ "Valid entry present on %s "
+ "Skipping cleanup of %s on %s",
+ priv->children[i]->name, entry->d_name,
+ subvol->name);
+ ret = 0;
goto out;
- } else {
- end_time_str = NULL;
- }
-
- snprintf (key, sizeof (key), "statistics_inprogress-%d-%d-%"PRIu64,
- xl_id, child, count);
+ }
+ }
- ret = dict_set_int32 (output, key, progress);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not add statistics_inprogress to outout");
- goto out;
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging %s %s/%s on all subvols", type, priv->anon_inode_name,
+ entry->d_name);
+ ret = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent,
+ entry->d_name, iatt->ia_type);
+ if (op_errno != ENOENT && op_errno != ESTALE) {
+ ret |= -op_errno;
+ }
}
+ }
- snprintf (key, sizeof (key), "statistics-%d-%d-count", xl_id, child);
- ret = dict_set_uint64 (output, key, count + 1);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not increment the counter.");
- goto out;
- }
out:
- GF_FREE (start_time_str);
- GF_FREE (end_time_str);
- return ret;
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ loc_wipe(&loc);
+ return ret;
}
-
-int
-afr_shd_dict_add_path (xlator_t *this, dict_t *output, int child, char *path,
- struct timeval *tv)
+static void
+afr_cleanup_anon_inode_dir(struct subvol_healer *healer)
{
- int ret = -1;
- uint64_t count = 0;
- char key[256] = {0};
- int xl_id = 0;
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ afr_private_t *priv = healer->this->private;
+ loc_t loc = {0};
+
+ ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode);
+ if (ret)
+ goto out;
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+
+ ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc,
+ GF_CLIENT_PID_SELF_HEALD, healer,
+ afr_shd_anon_inode_cleaner, NULL,
+ priv->shd.max_threads, priv->shd.wait_qlength);
+out:
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ loc_wipe(&loc);
+ return;
+}
- ret = dict_get_int32 (output, this->name, &xl_id);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "xl does not have id");
- goto out;
+void *
+afr_shd_index_healer(void *data)
+{
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ dict_t *pre_crawl_xdata = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ healer = data;
+ THIS = this = healer->this;
+ priv = this->private;
+
+ for (;;) {
+ afr_shd_healer_wait(healer);
+
+ if (!afr_bricks_available_for_heal(priv))
+ continue;
+
+ ASSERT_LOCAL(this, healer);
+ priv->local[healer->subvol] = healer->local;
+
+ if (priv->thin_arbiter_count) {
+ if (afr_shd_ta_needs_heal(this, healer))
+ afr_shd_ta_get_xattrs(this, &loc, healer, &pre_crawl_xdata);
}
- snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
- ret = dict_get_uint64 (output, key, &count);
-
- snprintf (key, sizeof (key), "%d-%d-%"PRIu64, xl_id, child, count);
- ret = dict_set_dynstr (output, key, path);
-
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "%s: Could not add to output",
- path);
- goto out;
+ do {
+ gf_msg_debug(this->name, 0, "starting index sweep on subvol %s",
+ afr_subvol_name(this, healer->subvol));
+
+ afr_shd_sweep_prepare(healer);
+
+ ret = afr_shd_index_sweep_all(healer);
+
+ afr_shd_sweep_done(healer);
+ /*
+ As long as at least one gfid was
+ healed, keep retrying. We may have
+ just healed a directory and thereby
+ created entries for other gfids which
+ could not be healed thus far.
+ */
+
+ gf_msg_debug(this->name, 0, "finished index sweep on subvol %s",
+ afr_subvol_name(this, healer->subvol));
+ /*
+ Give a pause before retrying to avoid a busy loop
+ in case the only entry in index is because of
+ an ongoing I/O.
+ */
+ sleep(1);
+ } while (ret > 0);
+
+ if (ret == 0) {
+ afr_cleanup_anon_inode_dir(healer);
}
- if (tv) {
- snprintf (key, sizeof (key), "%d-%d-%"PRIu64"-time", xl_id,
- child, count);
- ret = dict_set_uint32 (output, key, tv->tv_sec);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "%s: Could not set time",
- path);
- goto out;
- }
- }
-
- snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
+ if (ret == 0 && pre_crawl_xdata &&
+ !healer->crawl_event.heal_failed_count) {
+ afr_shd_ta_check_and_unset_xattrs(this, &loc, healer,
+ pre_crawl_xdata);
+ }
- ret = dict_set_uint64 (output, key, count + 1);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Could not increment count");
- goto out;
+ if (pre_crawl_xdata) {
+ dict_unref(pre_crawl_xdata);
+ pre_crawl_xdata = NULL;
}
+ }
- ret = 0;
-out:
- return ret;
+ return NULL;
}
-int
-afr_shd_gather_entry (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
- void *data)
+void *
+afr_shd_full_healer(void *data)
{
- dict_t *output = data;
- xlator_t *this = NULL;
- afr_private_t *priv = NULL;
- char *path = NULL;
- int ret = 0;
- int child = 0;
- uuid_t gfid = {0};
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+ int run = 0;
+
+ healer = data;
+ THIS = this = healer->this;
+
+ for (;;) {
+ pthread_mutex_lock(&healer->mutex);
+ {
+ run = __afr_shd_healer_wait(healer);
+ if (!run)
+ healer->running = _gf_false;
+ }
+ pthread_mutex_unlock(&healer->mutex);
- this = THIS;
- priv = this->private;
+ if (!run)
+ break;
- gf_log (this->name, GF_LOG_DEBUG, "got entry: %s",
- entry->d_name);
+ ASSERT_LOCAL(this, healer);
- ret = gf_uuid_parse (entry->d_name, gfid);
- if (ret)
- return 0;
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+ "starting full sweep on subvol %s",
+ afr_subvol_name(this, healer->subvol));
- for (child = 0; child < priv->child_count; child++)
- if (priv->children[child] == subvol)
- break;
+ afr_shd_sweep_prepare(healer);
- if (child == priv->child_count)
- return 0;
+ afr_shd_full_sweep(healer, this->itable->root);
- ret = syncop_gfid_to_path (this->itable, subvol, gfid, &path);
+ afr_shd_sweep_done(healer);
- if (ret == -ENOENT || ret == -ESTALE) {
- afr_shd_index_purge (subvol, parent->inode, entry->d_name);
- } else if (ret == 0) {
- ret = afr_shd_dict_add_path (this, output, child, path, NULL);
- }
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+ "finished full sweep on subvol %s",
+ afr_subvol_name(this, healer->subvol));
+ }
- return 0;
+ return NULL;
}
int
-afr_shd_gather_index_entries (xlator_t *this, int child, dict_t *output)
+afr_shd_healer_init(xlator_t *this, struct subvol_healer *healer)
{
- loc_t loc = {0};
- afr_private_t *priv = NULL;
- xlator_t *subvol = NULL;
- int ret = 0;
+ int ret = 0;
- priv = this->private;
- subvol = priv->children[child];
+ ret = pthread_mutex_init(&healer->mutex, NULL);
+ if (ret)
+ goto out;
- loc.inode = afr_shd_index_inode (this, subvol);
- if (!loc.inode) {
- gf_log (this->name, GF_LOG_WARNING,
- "unable to get index-dir on %s", subvol->name);
- return -errno;
+ ret = pthread_cond_init(&healer->cond, NULL);
+ if (ret)
+ goto out;
+
+ healer->this = this;
+ healer->running = _gf_false;
+ healer->rerun = _gf_false;
+ healer->local = _gf_false;
+out:
+ return ret;
+}
+
+int
+afr_shd_healer_spawn(xlator_t *this, struct subvol_healer *healer,
+ void *(threadfn)(void *))
+{
+ int ret = 0;
+
+ pthread_mutex_lock(&healer->mutex);
+ {
+ if (healer->running) {
+ pthread_cond_signal(&healer->cond);
+ } else {
+ ret = gf_thread_create(&healer->thread, NULL, threadfn, healer,
+ "shdheal");
+ if (ret)
+ goto unlock;
+ healer->running = 1;
}
- ret = syncop_dir_scan (subvol, &loc, GF_CLIENT_PID_AFR_SELF_HEALD,
- output, afr_shd_gather_entry);
- inode_forget (loc.inode, 1);
- loc_wipe (&loc);
- return ret;
+ healer->rerun = 1;
+ }
+unlock:
+ pthread_mutex_unlock(&healer->mutex);
+
+ return ret;
}
+int
+afr_shd_full_healer_spawn(xlator_t *this, int subvol)
+{
+ return afr_shd_healer_spawn(this, NTH_FULL_HEALER(this, subvol),
+ afr_shd_full_healer);
+}
int
-afr_add_shd_event (circular_buffer_t *cb, void *data)
+afr_shd_index_healer_spawn(xlator_t *this, int subvol)
{
- dict_t *output = NULL;
- xlator_t *this = THIS;
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- shd_event_t *shd_event = NULL;
- char *path = NULL;
-
- output = data;
- priv = this->private;
- shd = &priv->shd;
- shd_event = cb->data;
-
- if (!shd->index_healers[shd_event->child].local)
- return 0;
-
- path = gf_strdup (shd_event->path);
- if (!path)
- return -ENOMEM;
-
- afr_shd_dict_add_path (this, output, shd_event->child, path,
- &cb->tv);
- return 0;
+ return afr_shd_healer_spawn(this, NTH_INDEX_HEALER(this, subvol),
+ afr_shd_index_healer);
}
int
-afr_add_crawl_event (circular_buffer_t *cb, void *data)
+afr_shd_dict_add_crawl_event(xlator_t *this, dict_t *output,
+ crawl_event_t *crawl_event)
{
- dict_t *output = NULL;
- xlator_t *this = THIS;
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- crawl_event_t *crawl_event = NULL;
+ int ret = 0;
+ uint64_t count = 0;
+ char key[128] = {0};
+ int keylen = 0;
+ char suffix[64] = {0};
+ int xl_id = 0;
+ uint64_t healed_count = 0;
+ uint64_t split_brain_count = 0;
+ uint64_t heal_failed_count = 0;
+ char *start_time_str = 0;
+ char *end_time_str = NULL;
+ char *crawl_type = NULL;
+ int progress = -1;
+ int child = -1;
+
+ child = crawl_event->child;
+ healed_count = crawl_event->healed_count;
+ split_brain_count = crawl_event->split_brain_count;
+ heal_failed_count = crawl_event->heal_failed_count;
+ crawl_type = crawl_event->crawl_type;
+
+ if (!crawl_event->start_time)
+ goto out;
+
+ start_time_str = gf_strdup(ctime(&crawl_event->start_time));
+
+ if (crawl_event->end_time)
+ end_time_str = gf_strdup(ctime(&crawl_event->end_time));
+
+ ret = dict_get_int32(output, this->name, &xl_id);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "xl does not have id");
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "statistics-%d-%d-count", xl_id, child);
+ ret = dict_get_uint64(output, key, &count);
+
+ snprintf(suffix, sizeof(suffix), "%d-%d-%" PRIu64, xl_id, child, count);
+ snprintf(key, sizeof(key), "statistics_healed_cnt-%s", suffix);
+ ret = dict_set_uint64(output, key, healed_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_healed_count to output");
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "statistics_sb_cnt-%s", suffix);
+ ret = dict_set_uint64(output, key, split_brain_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_split_brain_count to output");
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "statistics_crawl_type-%s", suffix);
+ ret = dict_set_strn(output, key, keylen, crawl_type);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_crawl_type to output");
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "statistics_heal_failed_cnt-%s", suffix);
+ ret = dict_set_uint64(output, key, heal_failed_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_healed_failed_count to output");
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "statistics_strt_time-%s", suffix);
+ ret = dict_set_dynstrn(output, key, keylen, start_time_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_crawl_start_time to output");
+ goto out;
+ } else {
+ start_time_str = NULL;
+ }
+
+ if (!end_time_str)
+ progress = 1;
+ else
+ progress = 0;
+
+ keylen = snprintf(key, sizeof(key), "statistics_end_time-%s", suffix);
+ if (!end_time_str)
+ end_time_str = gf_strdup("Could not determine the end time");
+ ret = dict_set_dynstrn(output, key, keylen, end_time_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_crawl_end_time to output");
+ goto out;
+ } else {
+ end_time_str = NULL;
+ }
+
+ keylen = snprintf(key, sizeof(key), "statistics_inprogress-%s", suffix);
+
+ ret = dict_set_int32n(output, key, keylen, progress);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_inprogress to output");
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "statistics-%d-%d-count", xl_id, child);
+ ret = dict_set_uint64(output, key, count + 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not increment the counter.");
+ goto out;
+ }
+out:
+ GF_FREE(start_time_str);
+ GF_FREE(end_time_str);
+ return ret;
+}
- output = data;
- priv = this->private;
- shd = &priv->shd;
- crawl_event = cb->data;
+int
+afr_shd_dict_add_path(xlator_t *this, dict_t *output, int child, char *path,
+ struct timeval *tv)
+{
+ int ret = -1;
+ uint64_t count = 0;
+ char key[64] = {0};
+ int keylen = 0;
+ char xl_id_child_str[32] = {0};
+ int xl_id = 0;
+
+ ret = dict_get_int32(output, this->name, &xl_id);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "xl does not have id");
+ goto out;
+ }
+
+ snprintf(xl_id_child_str, sizeof(xl_id_child_str), "%d-%d", xl_id, child);
+ snprintf(key, sizeof(key), "%s-count", xl_id_child_str);
+ ret = dict_get_uint64(output, key, &count);
+
+ keylen = snprintf(key, sizeof(key), "%s-%" PRIu64, xl_id_child_str, count);
+ ret = dict_set_dynstrn(output, key, keylen, path);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Could not add to output", path);
+ goto out;
+ }
+
+ if (tv) {
+ snprintf(key, sizeof(key), "%s-%" PRIu64 "-time", xl_id_child_str,
+ count);
+ ret = dict_set_uint32(output, key, tv->tv_sec);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Could not set time", path);
+ goto out;
+ }
+ }
- if (!shd->index_healers[crawl_event->child].local)
- return 0;
+ snprintf(key, sizeof(key), "%s-count", xl_id_child_str);
- afr_shd_dict_add_crawl_event (this, output, crawl_event);
+ ret = dict_set_uint64(output, key, count + 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not increment count");
+ goto out;
+ }
- return 0;
+ ret = 0;
+out:
+ return ret;
}
-
int
-afr_selfheal_daemon_init (xlator_t *this)
+afr_add_shd_event(circular_buffer_t *cb, void *data)
{
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- int ret = -1;
- int i = 0;
-
- priv = this->private;
- shd = &priv->shd;
-
- this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this);
- if (!this->itable)
- goto out;
-
- shd->index_healers = GF_CALLOC (sizeof(*shd->index_healers),
- priv->child_count,
- gf_afr_mt_subvol_healer_t);
- if (!shd->index_healers)
- goto out;
-
- for (i = 0; i < priv->child_count; i++) {
- shd->index_healers[i].subvol = i;
- ret = afr_shd_healer_init (this, &shd->index_healers[i]);
- if (ret)
- goto out;
- }
-
- shd->full_healers = GF_CALLOC (sizeof(*shd->full_healers),
- priv->child_count,
- gf_afr_mt_subvol_healer_t);
- if (!shd->full_healers)
- goto out;
- for (i = 0; i < priv->child_count; i++) {
- shd->full_healers[i].subvol = i;
- ret = afr_shd_healer_init (this, &shd->full_healers[i]);
- if (ret)
- goto out;
- }
-
- shd->split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false,
- afr_destroy_shd_event_data);
- if (!shd->split_brain)
- goto out;
-
- shd->statistics = GF_CALLOC (sizeof(eh_t *), priv->child_count,
- gf_common_mt_eh_t);
- if (!shd->statistics)
- goto out;
+ dict_t *output = NULL;
+ xlator_t *this = THIS;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ shd_event_t *shd_event = NULL;
+ char *path = NULL;
+
+ output = data;
+ priv = this->private;
+ shd = &priv->shd;
+ shd_event = cb->data;
+
+ if (!shd->index_healers[shd_event->child].local)
+ return 0;
- for (i = 0; i < priv->child_count ; i++) {
- shd->statistics[i] = eh_new (AFR_STATISTICS_HISTORY_SIZE,
- _gf_false,
- afr_destroy_crawl_event_data);
- if (!shd->statistics[i])
- goto out;
- shd->full_healers[i].crawl_event.child = i;
- shd->full_healers[i].crawl_event.crawl_type = "FULL";
- shd->index_healers[i].crawl_event.child = i;
- shd->index_healers[i].crawl_event.crawl_type = "INDEX";
- }
+ path = gf_strdup(shd_event->path);
+ if (!path)
+ return -ENOMEM;
- ret = 0;
-out:
- return ret;
+ afr_shd_dict_add_path(this, output, shd_event->child, path, &cb->tv);
+ return 0;
}
-
int
-afr_selfheal_childup (xlator_t *this, int subvol)
+afr_add_crawl_event(circular_buffer_t *cb, void *data)
{
- afr_shd_index_healer_spawn (this, subvol);
+ dict_t *output = NULL;
+ xlator_t *this = THIS;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ crawl_event_t *crawl_event = NULL;
+
+ output = data;
+ priv = this->private;
+ shd = &priv->shd;
+ crawl_event = cb->data;
+
+ if (!shd->index_healers[crawl_event->child].local)
+ return 0;
+
+ afr_shd_dict_add_crawl_event(this, output, crawl_event);
- return 0;
+ return 0;
}
+int
+afr_selfheal_daemon_init(xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int ret = -1;
+ int i = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ shd->index_healers = GF_CALLOC(sizeof(*shd->index_healers),
+ priv->child_count,
+ gf_afr_mt_subvol_healer_t);
+ if (!shd->index_healers)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ shd->index_healers[i].subvol = i;
+ ret = afr_shd_healer_init(this, &shd->index_healers[i]);
+ if (ret)
+ goto out;
+ }
+
+ shd->full_healers = GF_CALLOC(sizeof(*shd->full_healers), priv->child_count,
+ gf_afr_mt_subvol_healer_t);
+ if (!shd->full_healers)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ shd->full_healers[i].subvol = i;
+ ret = afr_shd_healer_init(this, &shd->full_healers[i]);
+ if (ret)
+ goto out;
+ }
+
+ shd->split_brain = eh_new(AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false,
+ afr_destroy_shd_event_data);
+ if (!shd->split_brain)
+ goto out;
+
+ shd->statistics = GF_CALLOC(sizeof(eh_t *), priv->child_count,
+ gf_common_mt_eh_t);
+ if (!shd->statistics)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ shd->statistics[i] = eh_new(AFR_STATISTICS_HISTORY_SIZE, _gf_false,
+ afr_destroy_crawl_event_data);
+ if (!shd->statistics[i])
+ goto out;
+ shd->full_healers[i].crawl_event.child = i;
+ shd->full_healers[i].crawl_event.crawl_type = "FULL";
+ shd->index_healers[i].crawl_event.child = i;
+ shd->index_healers[i].crawl_event.crawl_type = "INDEX";
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+afr_selfheal_childup(xlator_t *this, afr_private_t *priv)
+{
+ int subvol = 0;
+
+ if (!priv->shd.iamshd)
+ return;
+ for (subvol = 0; subvol < priv->child_count; subvol++)
+ if (priv->child_up[subvol])
+ afr_shd_index_healer_spawn(this, subvol);
+
+ return;
+}
int
-afr_shd_get_index_count (xlator_t *this, int i, uint64_t *count)
+afr_shd_get_index_count(xlator_t *this, int i, uint64_t *count)
{
- afr_private_t *priv = NULL;
- xlator_t *subvol = NULL;
- loc_t rootloc = {0, };
- dict_t *xattr = NULL;
- int ret = -1;
+ afr_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ loc_t rootloc = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ int ret = -1;
- priv = this->private;
- subvol = priv->children[i];
+ priv = this->private;
+ subvol = priv->children[i];
- rootloc.inode = inode_ref (this->itable->root);
- gf_uuid_copy (rootloc.gfid, rootloc.inode->gfid);
+ rootloc.inode = inode_ref(this->itable->root);
+ gf_uuid_copy(rootloc.gfid, rootloc.inode->gfid);
- ret = syncop_getxattr (subvol, &rootloc, &xattr,
- GF_XATTROP_INDEX_COUNT, NULL, NULL);
- if (ret < 0)
- goto out;
+ ret = syncop_getxattr(subvol, &rootloc, &xattr, GF_XATTROP_INDEX_COUNT,
+ NULL, NULL);
+ if (ret < 0)
+ goto out;
- ret = dict_get_uint64 (xattr, GF_XATTROP_INDEX_COUNT, count);
- if (ret)
- goto out;
+ ret = dict_get_uint64(xattr, GF_XATTROP_INDEX_COUNT, count);
+ if (ret)
+ goto out;
- ret = 0;
+ ret = 0;
out:
- if (xattr)
- dict_unref (xattr);
- loc_wipe (&rootloc);
+ if (xattr)
+ dict_unref(xattr);
+ loc_wipe(&rootloc);
- return ret;
+ return ret;
}
-
int
-afr_xl_op (xlator_t *this, dict_t *input, dict_t *output)
+afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
{
- gf_xl_afr_op_t op = GF_SHD_OP_INVALID;
- int ret = 0;
- int xl_id = 0;
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- struct subvol_healer *healer = NULL;
- int i = 0;
- char key[64];
- int op_ret = 0;
- uint64_t cnt = 0;
-
- priv = this->private;
- shd = &priv->shd;
-
- ret = dict_get_int32 (input, "xl-op", (int32_t*)&op);
- if (ret)
- goto out;
- ret = dict_get_int32 (input, this->name, &xl_id);
- if (ret)
- goto out;
- ret = dict_set_int32 (output, this->name, xl_id);
- if (ret)
- goto out;
- switch (op) {
+ gf_xl_afr_op_t op = GF_SHD_OP_INVALID;
+ int ret = 0;
+ int xl_id = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ struct subvol_healer *healer = NULL;
+ int i = 0;
+ char key[64];
+ int keylen = 0;
+ int this_name_len = 0;
+ int op_ret = 0;
+ uint64_t cnt = 0;
+
+#define AFR_SET_DICT_AND_LOG(name, output, key, keylen, dict_str, \
+ dict_str_len) \
+ { \
+ int ret; \
+ \
+ ret = dict_set_nstrn(output, key, keylen, dict_str, dict_str_len); \
+ if (ret) { \
+ gf_smsg(name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, \
+ "key=%s", key, "value=%s", dict_str, NULL); \
+ } \
+ }
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ ret = dict_get_int32_sizen(input, "xl-op", (int32_t *)&op);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "key=xl-op", NULL);
+ goto out;
+ }
+ this_name_len = strlen(this->name);
+ ret = dict_get_int32n(input, this->name, this_name_len, &xl_id);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "key=%s", this->name, NULL);
+ goto out;
+ }
+ ret = dict_set_int32n(output, this->name, this_name_len, xl_id);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "key=%s", this->name, NULL);
+ goto out;
+ }
+ switch (op) {
case GF_SHD_OP_HEAL_INDEX:
- op_ret = -1;
-
- for (i = 0; i < priv->child_count; i++) {
- healer = &shd->index_healers[i];
- snprintf (key, sizeof (key), "%d-%d-status", xl_id, i);
-
- if (!priv->child_up[i]) {
- ret = dict_set_str (output, key,
- "Brick is not connected");
- } else if (AFR_COUNT (priv->child_up,
- priv->child_count) < 2) {
- ret = dict_set_str (output, key,
- "< 2 bricks in replica are up");
- } else if (!afr_shd_is_subvol_local (this, healer->subvol)) {
- ret = dict_set_str (output, key,
- "Brick is remote");
- } else {
- ret = dict_set_str (output, key,
- "Started self-heal");
- afr_shd_index_healer_spawn (this, i);
- op_ret = 0;
- }
- }
- break;
+ op_ret = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ healer = &shd->index_healers[i];
+ keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
+
+ if (!priv->child_up[i]) {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_NOT_CONNECTED,
+ SLEN(SBRICK_NOT_CONNECTED));
+ op_ret = -1;
+ } else if (AFR_COUNT(priv->child_up, priv->child_count) < 2) {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SLESS_THAN2_BRICKS_in_REP,
+ SLEN(SLESS_THAN2_BRICKS_in_REP));
+ op_ret = -1;
+ } else if (!afr_shd_is_subvol_local(this, healer->subvol)) {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_IS_REMOTE,
+ SLEN(SBRICK_IS_REMOTE));
+ } else {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SSTARTED_SELF_HEAL,
+ SLEN(SSTARTED_SELF_HEAL));
+
+ ret = afr_shd_index_healer_spawn(this, i);
+
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_HEALER_SPAWN_FAILED, NULL);
+ }
+ }
+ }
+ break;
case GF_SHD_OP_HEAL_FULL:
- op_ret = -1;
-
- for (i = 0; i < priv->child_count; i++) {
- healer = &shd->full_healers[i];
- snprintf (key, sizeof (key), "%d-%d-status", xl_id, i);
-
- if (!priv->child_up[i]) {
- ret = dict_set_str (output, key,
- "Brick is not connected");
- } else if (AFR_COUNT (priv->child_up,
- priv->child_count) < 2) {
- ret = dict_set_str (output, key,
- "< 2 bricks in replica are up");
- } else if (!afr_shd_is_subvol_local (this, healer->subvol)) {
- ret = dict_set_str (output, key,
- "Brick is remote");
- } else {
- ret = dict_set_str (output, key,
- "Started self-heal");
- afr_shd_full_healer_spawn (this, i);
- op_ret = 0;
- }
- }
- break;
- case GF_SHD_OP_INDEX_SUMMARY:
- for (i = 0; i < priv->child_count; i++)
- if (shd->index_healers[i].local)
- afr_shd_gather_index_entries (this, i, output);
- break;
- case GF_SHD_OP_HEALED_FILES:
- case GF_SHD_OP_HEAL_FAILED_FILES:
- for (i = 0; i < priv->child_count; i++) {
- snprintf (key, sizeof (key), "%d-%d-status", xl_id, i);
- ret = dict_set_str (output, key, "Operation Not "
- "Supported");
+ op_ret = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ healer = &shd->full_healers[i];
+ keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
+
+ if (!priv->child_up[i]) {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_NOT_CONNECTED,
+ SLEN(SBRICK_NOT_CONNECTED));
+ } else if (AFR_COUNT(priv->child_up, priv->child_count) < 2) {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SLESS_THAN2_BRICKS_in_REP,
+ SLEN(SLESS_THAN2_BRICKS_in_REP));
+ } else if (!afr_shd_is_subvol_local(this, healer->subvol)) {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_IS_REMOTE,
+ SLEN(SBRICK_IS_REMOTE));
+ } else {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SSTARTED_SELF_HEAL,
+ SLEN(SSTARTED_SELF_HEAL));
+
+ ret = afr_shd_full_healer_spawn(this, i);
+
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_HEALER_SPAWN_FAILED, NULL);
+ }
+ op_ret = 0;
}
- break;
+ }
+ break;
+ case GF_SHD_OP_INDEX_SUMMARY:
+ /* this case has been handled in glfs-heal.c */
+ break;
case GF_SHD_OP_SPLIT_BRAIN_FILES:
- eh_dump (shd->split_brain, output, afr_add_shd_event);
- break;
+ eh_dump(shd->split_brain, output, afr_add_shd_event);
+ break;
case GF_SHD_OP_STATISTICS:
- for (i = 0; i < priv->child_count; i++) {
- eh_dump (shd->statistics[i], output,
- afr_add_crawl_event);
- afr_shd_dict_add_crawl_event (this, output,
- &shd->index_healers[i].crawl_event);
- afr_shd_dict_add_crawl_event (this, output,
- &shd->full_healers[i].crawl_event);
- }
- break;
+ for (i = 0; i < priv->child_count; i++) {
+ eh_dump(shd->statistics[i], output, afr_add_crawl_event);
+ ret = afr_shd_dict_add_crawl_event(
+ this, output, &shd->index_healers[i].crawl_event);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_ADD_CRAWL_EVENT_FAILED, NULL);
+ }
+
+ ret = afr_shd_dict_add_crawl_event(
+ this, output, &shd->full_healers[i].crawl_event);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_ADD_CRAWL_EVENT_FAILED, NULL);
+ }
+ }
+ break;
case GF_SHD_OP_STATISTICS_HEAL_COUNT:
case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
- op_ret = -1;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->child_up[i]) {
- snprintf (key, sizeof (key), "%d-%d-status",
- xl_id, i);
- ret = dict_set_str (output, key,
- "Brick is not connected");
- } else {
- snprintf (key, sizeof (key), "%d-%d-hardlinks",
- xl_id, i);
- ret = afr_shd_get_index_count (this, i, &cnt);
- if (ret == 0) {
- ret = dict_set_uint64 (output, key, cnt);
- }
- op_ret = 0;
- }
- }
-
-// ret = _do_crawl_op_on_local_subvols (this, INDEX_TO_BE_HEALED,
-// STATISTICS_TO_BE_HEALED,
-// output);
- break;
+ op_ret = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->child_up[i]) {
+ keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id,
+ i);
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_NOT_CONNECTED,
+ SLEN(SBRICK_NOT_CONNECTED));
+ } else {
+ snprintf(key, sizeof(key), "%d-%d-hardlinks", xl_id, i);
+ ret = afr_shd_get_index_count(this, i, &cnt);
+ if (ret == 0) {
+ ret = dict_set_uint64(output, key, cnt);
+ }
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_DICT_SET_FAILED, NULL);
+ }
+ op_ret = 0;
+ }
+ }
+
+ break;
default:
- gf_log (this->name, GF_LOG_ERROR, "Unknown set op %d", op);
- break;
- }
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG, "op=%d",
+ op, NULL);
+ break;
+ }
out:
- dict_del (output, this->name);
- return op_ret;
+ dict_deln(output, this->name, this_name_len);
+ return op_ret;
+
+#undef AFR_SET_DICT_AND_LOG
}
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index 58b088e4cd7..18db728ea7b 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -8,70 +8,68 @@
cases as published by the Free Software Foundation.
*/
-
#ifndef _AFR_SELF_HEALD_H
#define _AFR_SELF_HEALD_H
#include <pthread.h>
-
typedef struct {
- int child;
- char *path;
+ char *path;
+ int child;
} shd_event_t;
typedef struct {
- int child;
- uint64_t healed_count;
- uint64_t split_brain_count;
- uint64_t heal_failed_count;
-
- /* If start_time is 0, it means crawler is not in progress
- and stats are not valid */
- time_t start_time;
- /* If start_time is NOT 0 and end_time is 0, it means
- cralwer is in progress */
- time_t end_time;
- char *crawl_type;
+ uint64_t healed_count;
+ uint64_t split_brain_count;
+ uint64_t heal_failed_count;
+
+ /* If start_time is 0, it means crawler is not in progress
+ and stats are not valid */
+ time_t start_time;
+ /* If start_time is NOT 0 and end_time is 0, it means
+ cralwer is in progress */
+ time_t end_time;
+ char *crawl_type;
+ int child;
} crawl_event_t;
struct subvol_healer {
- xlator_t *this;
- int subvol;
- gf_boolean_t local;
- gf_boolean_t running;
- gf_boolean_t rerun;
- crawl_event_t crawl_event;
- pthread_mutex_t mutex;
- pthread_cond_t cond;
- pthread_t thread;
+ xlator_t *this;
+ crawl_event_t crawl_event;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ pthread_t thread;
+ int subvol;
+ gf_boolean_t local;
+ gf_boolean_t running;
+ gf_boolean_t rerun;
};
typedef struct {
- gf_boolean_t iamshd;
- gf_boolean_t enabled;
- int timeout;
- struct subvol_healer *index_healers;
- struct subvol_healer *full_healers;
-
- eh_t *split_brain;
- eh_t **statistics;
+ struct subvol_healer *index_healers;
+ struct subvol_healer *full_healers;
+
+ eh_t *split_brain;
+ eh_t **statistics;
+ int timeout;
+ uint32_t max_threads;
+ uint32_t wait_qlength;
+ uint32_t halo_max_latency_msec;
+ gf_boolean_t iamshd;
+ gf_boolean_t enabled;
} afr_self_heald_t;
-
-int
-afr_selfheal_childup (xlator_t *this, int subvol);
-
int
-afr_selfheal_daemon_init (xlator_t *this);
+afr_selfheal_daemon_init(xlator_t *this);
int
-afr_xl_op (xlator_t *this, dict_t *input, dict_t *output);
+afr_xl_op(xlator_t *this, dict_t *input, dict_t *output);
int
-afr_shd_gfid_to_path (xlator_t *this, xlator_t *subvol, uuid_t gfid,
- char **path_p);
+afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid,
+ char **path_p);
int
-afr_shd_index_purge (xlator_t *subvol, inode_t *inode, char *name);
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
+ ia_type_t type);
#endif /* !_AFR_SELF_HEALD_H */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index d2111060035..a51f79b1f43 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -8,1868 +8,2920 @@
cases as published by the Free Software Foundation.
*/
-#include "dict.h"
-#include "byte-order.h"
-#include "common-utils.h"
-#include "timer.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/timer.h>
#include "afr.h"
#include "afr-transaction.h"
+#include "afr-self-heal.h"
+#include "afr-messages.h"
#include <signal.h>
+typedef enum {
+ AFR_TRANSACTION_PRE_OP,
+ AFR_TRANSACTION_POST_OP,
+} afr_xattrop_type_t;
+
+static void
+afr_lock_resume_shared(struct list_head *list);
+
+static void
+afr_post_op_handle_success(call_frame_t *frame, xlator_t *this);
+
+static void
+afr_post_op_handle_failure(call_frame_t *frame, xlator_t *this, int op_errno);
+
+void
+__afr_transaction_wake_shared(afr_local_t *local, struct list_head *shared);
+
+void
+afr_changelog_post_op_do(call_frame_t *frame, xlator_t *this);
+
+int
+afr_changelog_post_op_safe(call_frame_t *frame, xlator_t *this);
+
gf_boolean_t
-afr_changelog_pre_op_uninherit (call_frame_t *frame, xlator_t *this);
+afr_changelog_pre_op_uninherit(call_frame_t *frame, xlator_t *this);
gf_boolean_t
-afr_changelog_pre_op_update (call_frame_t *frame, xlator_t *this);
+afr_changelog_pre_op_update(call_frame_t *frame, xlator_t *this);
int
-afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
- afr_changelog_resume_t changelog_resume);
+afr_changelog_call_count(afr_transaction_type type,
+ unsigned char *pre_op_subvols,
+ unsigned char *failed_subvols,
+ unsigned int child_count);
+int
+afr_changelog_do(call_frame_t *frame, xlator_t *this, dict_t *xattr,
+ afr_changelog_resume_t changelog_resume,
+ afr_xattrop_type_t op);
+
+static void
+afr_ta_decide_post_op_state(call_frame_t *frame, xlator_t *this);
+
+static int
+afr_ta_post_op_do(void *opaque);
-static int32_t
-afr_quorum_errno (afr_private_t *priv)
+static int
+afr_ta_post_op_synctask(xlator_t *this, afr_local_t *local);
+
+static int
+afr_changelog_post_op_done(call_frame_t *frame, xlator_t *this);
+
+static void
+afr_changelog_post_op_fail(call_frame_t *frame, xlator_t *this, int op_errno);
+
+void
+afr_ta_locked_priv_invalidate(afr_private_t *priv)
{
- if (priv->quorum_reads)
- return ENOTCONN;
- return EROFS;
+ priv->ta_bad_child_index = AFR_CHILD_UNKNOWN;
+ priv->release_ta_notify_dom_lock = _gf_false;
+ priv->ta_notify_dom_lock_offset = 0;
+}
+
+static void
+afr_ta_process_waitq(xlator_t *this)
+{
+ afr_local_t *entry = NULL;
+ afr_private_t *priv = this->private;
+ struct list_head waitq = {
+ 0,
+ };
+
+ INIT_LIST_HEAD(&waitq);
+ LOCK(&priv->lock);
+ list_splice_init(&priv->ta_waitq, &waitq);
+ UNLOCK(&priv->lock);
+ list_for_each_entry(entry, &waitq, ta_waitq)
+ {
+ afr_ta_decide_post_op_state(entry->transaction.frame, this);
+ }
}
int
-__afr_txn_write_fop (call_frame_t *frame, xlator_t *this)
+afr_ta_lock_release_done(int ret, call_frame_t *ta_frame, void *opaque)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ afr_ta_process_waitq(ta_frame->this);
+ STACK_DESTROY(ta_frame->root);
+ return 0;
+}
- local = frame->local;
- priv = this->private;
+int
+afr_release_notify_lock_for_ta(void *opaque)
+{
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct gf_flock flock = {
+ 0,
+ };
+ int ret = -1;
+
+ this = (xlator_t *)opaque;
+ priv = this->private;
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate loc for thin-arbiter.");
+ goto out;
+ }
+ flock.l_type = F_UNLCK;
+ flock.l_start = priv->ta_notify_dom_lock_offset;
+ flock.l_len = 1;
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_NOTIFY, &loc, F_SETLK, &flock, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to unlock AFR_TA_DOM_NOTIFY lock.");
+ }
+
+ LOCK(&priv->lock);
+ {
+ afr_ta_locked_priv_invalidate(priv);
+ }
+ UNLOCK(&priv->lock);
+out:
+ loc_wipe(&loc);
+ return ret;
+}
- call_count = AFR_COUNT (local->transaction.pre_op, priv->child_count);
+void
+afr_zero_fill_stat(afr_local_t *local)
+{
+ if (!local)
+ return;
+ if (local->transaction.type == AFR_DATA_TRANSACTION ||
+ local->transaction.type == AFR_METADATA_TRANSACTION) {
+ gf_zero_fill_stat(&local->cont.inode_wfop.prebuf);
+ gf_zero_fill_stat(&local->cont.inode_wfop.postbuf);
+ } else if (local->transaction.type == AFR_ENTRY_TRANSACTION ||
+ local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+ gf_zero_fill_stat(&local->cont.dir_fop.buf);
+ gf_zero_fill_stat(&local->cont.dir_fop.preparent);
+ gf_zero_fill_stat(&local->cont.dir_fop.postparent);
+ if (local->transaction.type == AFR_ENTRY_TRANSACTION)
+ return;
+ gf_zero_fill_stat(&local->cont.dir_fop.prenewparent);
+ gf_zero_fill_stat(&local->cont.dir_fop.postnewparent);
+ }
+}
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
+/* In case of errors afr needs to choose which xdata from lower xlators it needs
+ * to unwind with. The way it is done is by checking if there are
+ * any good subvols which failed. Give preference to errnos other than
+ * ENOTCONN even if the child is source */
+void
+afr_pick_error_xdata(afr_local_t *local, afr_private_t *priv, inode_t *inode1,
+ unsigned char *readable1, inode_t *inode2,
+ unsigned char *readable2)
+{
+ int s = -1; /*selection*/
+ int i = 0;
+ unsigned char *readable = NULL;
+
+ if (local->xdata_rsp) {
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = NULL;
+ }
+
+ readable = alloca0(priv->child_count * sizeof(*readable));
+ if (inode2 && readable2) { /*rename fop*/
+ AFR_INTERSECT(readable, readable1, readable2, priv->child_count);
+ } else {
+ memcpy(readable, readable1, sizeof(*readable) * priv->child_count);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+
+ if (local->replies[i].op_ret >= 0)
+ continue;
+
+ if (local->replies[i].op_errno == ENOTCONN)
+ continue;
+
+ /*Order is important in the following condition*/
+ if ((s < 0) || (!readable[s] && readable[i]))
+ s = i;
+ }
+
+ if (s != -1 && local->replies[s].xdata) {
+ local->xdata_rsp = dict_ref(local->replies[s].xdata);
+ } else if (s == -1) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+
+ if (local->replies[i].op_ret >= 0)
+ continue;
+
+ if (!local->replies[i].xdata)
+ continue;
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ break;
}
+ }
+}
- local->call_count = call_count;
+gf_boolean_t
+afr_needs_changelog_update(afr_local_t *local)
+{
+ if (local->transaction.type == AFR_DATA_TRANSACTION)
+ return _gf_true;
+ if (!local->optimistic_change_log)
+ return _gf_true;
+ return _gf_false;
+}
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- local->transaction.wind (frame, this, i);
+gf_boolean_t
+afr_changelog_has_quorum(afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ unsigned char *success_children = NULL;
- if (!--call_count)
- break;
- }
+ priv = this->private;
+ success_children = alloca0(priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.failed_subvols[i]) {
+ success_children[i] = 1;
}
+ }
- return 0;
+ if (afr_has_quorum(success_children, this, NULL)) {
+ return _gf_true;
+ }
+
+ return _gf_false;
}
+gf_boolean_t
+afr_is_write_subvol_valid(call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ uint64_t write_subvol = 0;
+ unsigned char *writable = NULL;
+ uint16_t datamap = 0;
+
+ local = frame->local;
+ priv = this->private;
+ writable = alloca0(priv->child_count);
+
+ write_subvol = afr_write_subvol_get(frame, this);
+ datamap = (write_subvol & 0x00000000ffff0000) >> 16;
+ for (i = 0; i < priv->child_count; i++) {
+ if (datamap & (1 << i))
+ writable[i] = 1;
+
+ if (writable[i] && !local->transaction.failed_subvols[i])
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
int
-__afr_txn_write_done (call_frame_t *frame, xlator_t *this)
+afr_transaction_fop(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ unsigned char *failed_subvols = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ failed_subvols = local->transaction.failed_subvols;
+ call_count = priv->child_count -
+ AFR_COUNT(failed_subvols, priv->child_count);
+ /* Fail if pre-op did not succeed on quorum no. of bricks. */
+ if (!afr_changelog_has_quorum(local, this) || !call_count) {
+ local->op_ret = -1;
+ /* local->op_errno is already captured in changelog cbk. */
+ afr_transaction_resume(frame, this);
+ return 0;
+ }
- local = frame->local;
+ /* Fail if at least one writeable brick isn't up.*/
+ if (local->transaction.type == AFR_DATA_TRANSACTION &&
+ !afr_is_write_subvol_valid(frame, this)) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ afr_transaction_resume(frame, this);
+ return 0;
+ }
- local->transaction.unwind (frame, this);
+ local->call_count = call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] && !failed_subvols[i]) {
+ local->transaction.wind(frame, this, i);
- AFR_STACK_DESTROY (frame);
+ if (!--call_count)
+ break;
+ }
+ }
- return 0;
+ return 0;
}
-
-call_frame_t*
-afr_transaction_detach_fop_frame (call_frame_t *frame)
+int
+afr_transaction_done(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *fop_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t unwind = _gf_false;
+ afr_lock_t *lock = NULL;
+ afr_local_t *lock_local = NULL;
- local = frame->local;
+ priv = this->private;
+ local = frame->local;
- LOCK (&frame->lock);
+ if (priv->consistent_metadata) {
+ LOCK(&frame->lock);
{
- fop_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
+ unwind = (local->transaction.main_frame != NULL);
}
- UNLOCK (&frame->lock);
-
- return fop_frame;
+ UNLOCK(&frame->lock);
+ if (unwind) /*It definitely did post-op*/
+ afr_zero_fill_stat(local);
+ }
+
+ if (local->transaction.do_eager_unlock) {
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ LOCK(&local->inode->lock);
+ {
+ lock->acquired = _gf_false;
+ lock->release = _gf_false;
+ list_splice_init(&lock->frozen, &lock->waiting);
+ if (list_empty(&lock->waiting))
+ goto unlock;
+ lock_local = list_entry(lock->waiting.next, afr_local_t,
+ transaction.wait_list);
+ list_del_init(&lock_local->transaction.wait_list);
+ list_add(&lock_local->transaction.owner_list, &lock->owners);
+ }
+ unlock:
+ UNLOCK(&local->inode->lock);
+ }
+ if (lock_local) {
+ afr_lock(lock_local->transaction.frame,
+ lock_local->transaction.frame->this);
+ }
+ local->transaction.unwind(frame, this);
+
+ GF_ASSERT(list_empty(&local->transaction.owner_list));
+ GF_ASSERT(list_empty(&local->transaction.wait_list));
+ AFR_STACK_DESTROY(frame);
+
+ return 0;
}
-
static void
-afr_save_lk_owner (call_frame_t *frame)
+afr_lock_fail_shared(afr_local_t *local, struct list_head *list)
{
- afr_local_t * local = NULL;
-
- local = frame->local;
-
- local->saved_lk_owner = frame->root->lk_owner;
+ afr_local_t *each = NULL;
+
+ while (!list_empty(list)) {
+ each = list_entry(list->next, afr_local_t, transaction.wait_list);
+ list_del_init(&each->transaction.wait_list);
+ each->op_ret = -1;
+ each->op_errno = local->op_errno;
+ afr_transaction_done(each->transaction.frame,
+ each->transaction.frame->this);
+ }
}
-
static void
-afr_restore_lk_owner (call_frame_t *frame)
+afr_handle_lock_acquire_failure(afr_local_t *local)
{
- afr_local_t * local = NULL;
-
- local = frame->local;
+ struct list_head shared;
+ afr_lock_t *lock = NULL;
- frame->root->lk_owner = local->saved_lk_owner;
-}
+ if (!local->transaction.eager_lock_on)
+ goto out;
-void
-__mark_all_success (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i;
+ lock = &local->inode_ctx->lock[local->transaction.type];
- local = frame->local;
- priv = this->private;
+ INIT_LIST_HEAD(&shared);
+ LOCK(&local->inode->lock);
+ {
+ lock->release = _gf_true;
+ list_splice_init(&lock->waiting, &shared);
+ }
+ UNLOCK(&local->inode->lock);
- for (i = 0; i < priv->child_count; i++) {
- local->transaction.failed_subvols[i] = 0;
- }
+ afr_lock_fail_shared(local, &shared);
+ local->transaction.do_eager_unlock = _gf_true;
+out:
+ local->internal_lock.lock_cbk = afr_transaction_done;
+ afr_unlock(local->transaction.frame, local->transaction.frame->this);
}
-
-int
-afr_transaction_perform_fop (call_frame_t *frame, xlator_t *this)
+call_frame_t *
+afr_transaction_detach_fop_frame(call_frame_t *frame)
{
- afr_local_t *local = NULL;
- fd_t *fd = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *fop_frame = NULL;
- local = frame->local;
- fd = local->fd;
+ local = frame->local;
- /* Perform fops with the lk-owner from top xlator.
- * Eg: lk-owner of posix-lk and flush should be same,
- * flush cant clear the posix-lks without that lk-owner.
- */
- afr_save_lk_owner (frame);
- frame->root->lk_owner =
- local->transaction.main_frame->root->lk_owner;
-
- if (local->pre_op_compat)
- /* old mode, pre-op was done as afr_changelog_do()
- just now, before OP */
- afr_changelog_pre_op_update (frame, this);
-
- /* The wake up needs to happen independent of
- what type of fop arrives here. If it was
- a write, then it has already inherited the
- lock and changelog. If it was not a write,
- then the presumption of the optimization (of
- optimizing for successive write operations)
- fails.
- */
- if (fd)
- afr_delayed_changelog_wake_up (this, fd);
- local->transaction.fop (frame, this);
+ afr_handle_inconsistent_fop(frame, &local->op_ret, &local->op_errno);
+ LOCK(&frame->lock);
+ {
+ fop_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK(&frame->lock);
- return 0;
+ return fop_frame;
}
-
-static int
-__changelog_enabled (afr_private_t *priv, afr_transaction_type type)
+static void
+afr_save_lk_owner(call_frame_t *frame)
{
- int ret = 0;
+ afr_local_t *local = NULL;
- switch (type) {
- case AFR_DATA_TRANSACTION:
- if (priv->data_change_log)
- ret = 1;
+ local = frame->local;
- break;
+ local->saved_lk_owner = frame->root->lk_owner;
+}
- case AFR_METADATA_TRANSACTION:
- if (priv->metadata_change_log)
- ret = 1;
+static void
+afr_restore_lk_owner(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
- break;
+ local = frame->local;
- case AFR_ENTRY_TRANSACTION:
- case AFR_ENTRY_RENAME_TRANSACTION:
- if (priv->entry_change_log)
- ret = 1;
+ frame->root->lk_owner = local->saved_lk_owner;
+}
- break;
- }
+void
+__mark_all_success(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i;
- return ret;
-}
+ local = frame->local;
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ local->transaction.failed_subvols[i] = 0;
+ }
+}
-static int
-__fop_changelog_needed (call_frame_t *frame, xlator_t *this)
+void
+afr_compute_pre_op_sources(call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- int op_ret = 0;
- afr_transaction_type type = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_transaction_type type = -1;
+ dict_t *xdata = NULL;
+ int **matrix = NULL;
+ int idx = -1;
+ int i = 0;
+ int j = 0;
+
+ priv = this->private;
+ local = frame->local;
+ type = local->transaction.type;
+ idx = afr_index_for_transaction_type(type);
+ matrix = ALLOC_MATRIX(priv->child_count, int);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.changelog_xdata[i])
+ continue;
+ xdata = local->transaction.changelog_xdata[i];
+ afr_selfheal_fill_matrix(this, matrix, i, idx, xdata);
+ }
+
+ memset(local->transaction.pre_op_sources, 1, priv->child_count);
+
+ /*If lock or pre-op failed on a brick, it is not a source. */
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i])
+ local->transaction.pre_op_sources[i] = 0;
+ }
+
+ /* If brick is blamed by others, it is not a source. */
+ for (i = 0; i < priv->child_count; i++)
+ for (j = 0; j < priv->child_count; j++)
+ if (matrix[i][j] != 0)
+ local->transaction.pre_op_sources[j] = 0;
+}
- priv = this->private;
- local = frame->local;
- type = local->transaction.type;
+void
+afr_txn_arbitrate_fop(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int pre_op_sources_count = 0;
+ int i = 0;
- if (__changelog_enabled (priv, type)) {
- switch (local->op) {
+ priv = this->private;
+ local = frame->local;
- case GF_FOP_WRITE:
- case GF_FOP_FTRUNCATE:
- op_ret = 1;
- break;
+ afr_compute_pre_op_sources(frame, this);
+ pre_op_sources_count = AFR_COUNT(local->transaction.pre_op_sources,
+ priv->child_count);
- case GF_FOP_FLUSH:
- op_ret = 0;
- break;
+ /* If arbiter is the only source, do not proceed. */
+ if (pre_op_sources_count < 2 &&
+ local->transaction.pre_op_sources[ARBITER_BRICK_INDEX]) {
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ for (i = 0; i < priv->child_count; i++)
+ local->transaction.failed_subvols[i] = 1;
+ }
- default:
- op_ret = 1;
- }
- }
+ afr_transaction_fop(frame, this);
- return op_ret;
+ return;
}
-
int
-afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int **pending)
+afr_transaction_perform_fop(call_frame_t *frame, xlator_t *this)
{
- int i = 0;
- int ret = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int ret = 0;
+ int failure_count = 0;
+ struct list_head shared;
+ afr_lock_t *lock = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ INIT_LIST_HEAD(&shared);
+ if (local->transaction.type == AFR_DATA_TRANSACTION &&
+ !local->transaction.inherited) {
+ ret = afr_write_subvol_set(frame, this);
+ if (ret) {
+ /*act as if operation failed on all subvols*/
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ for (i = 0; i < priv->child_count; i++)
+ local->transaction.failed_subvols[i] = 1;
+ }
+ }
+
+ if (local->pre_op_compat)
+ /* old mode, pre-op was done as afr_changelog_do()
+ just now, before OP */
+ afr_changelog_pre_op_update(frame, this);
+
+ if (!local->transaction.eager_lock_on || local->transaction.inherited)
+ goto fop;
+ failure_count = AFR_COUNT(local->transaction.failed_subvols,
+ priv->child_count);
+ if (failure_count == priv->child_count) {
+ afr_handle_lock_acquire_failure(local);
+ return 0;
+ } else {
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ LOCK(&local->inode->lock);
+ {
+ lock->acquired = _gf_true;
+ __afr_transaction_wake_shared(local, &shared);
+ }
+ UNLOCK(&local->inode->lock);
+ }
+
+fop:
+ /* Perform fops with the lk-owner from top xlator.
+ * Eg: lk-owner of posix-lk and flush should be same,
+ * flush cant clear the posix-lks without that lk-owner.
+ */
+ afr_save_lk_owner(frame);
+ frame->root->lk_owner = local->transaction.main_frame->root->lk_owner;
+
+ if (priv->arbiter_count == 1) {
+ afr_txn_arbitrate_fop(frame, this);
+ } else {
+ afr_transaction_fop(frame, this);
+ }
+
+ afr_lock_resume_shared(&shared);
+ return 0;
+}
- for (i = 0; i < priv->child_count; i++) {
+int
+afr_set_pending_dict(afr_private_t *priv, dict_t *xattr, int **pending)
+{
+ int i = 0;
+ int ret = 0;
- ret = dict_set_static_bin (xattr, priv->pending_key[i],
- pending[i],
- AFR_NUM_CHANGE_LOGS * sizeof (int));
- /* 3 = data+metadata+entry */
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_static_bin(xattr, priv->pending_key[i], pending[i],
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ /* 3 = data+metadata+entry */
- if (ret)
- break;
- }
+ if (ret)
+ break;
+ }
- return ret;
+ return ret;
}
-int
-afr_lock_server_count (afr_private_t *priv, afr_transaction_type type)
+static void
+afr_ta_dom_lock_check_and_release(afr_ta_fop_state_t fop_state, xlator_t *this)
{
- int ret = 0;
-
- switch (type) {
- case AFR_DATA_TRANSACTION:
- ret = priv->child_count;
+ afr_private_t *priv = this->private;
+ unsigned int inmem_count = 0;
+ unsigned int onwire_count = 0;
+ gf_boolean_t release = _gf_false;
+
+ LOCK(&priv->lock);
+ {
+ /*Once we get notify lock release upcall notification,
+ if any of the fop state counters are non-zero, we will
+ not release the lock.
+ */
+ onwire_count = priv->ta_on_wire_txn_count;
+ inmem_count = priv->ta_in_mem_txn_count;
+ switch (fop_state) {
+ case TA_GET_INFO_FROM_TA_FILE:
+ onwire_count = --priv->ta_on_wire_txn_count;
break;
-
- case AFR_METADATA_TRANSACTION:
- ret = priv->child_count;
+ case TA_INFO_IN_MEMORY_SUCCESS:
+ case TA_INFO_IN_MEMORY_FAILED:
+ inmem_count = --priv->ta_in_mem_txn_count;
break;
-
- case AFR_ENTRY_TRANSACTION:
- case AFR_ENTRY_RENAME_TRANSACTION:
- ret = priv->child_count;
+ case TA_WAIT_FOR_NOTIFY_LOCK_REL:
+ GF_ASSERT(0);
+ break;
+ case TA_SUCCESS:
break;
}
+ release = priv->release_ta_notify_dom_lock;
+ }
+ UNLOCK(&priv->lock);
- return ret;
-}
+ if (inmem_count != 0 || release == _gf_false || onwire_count != 0)
+ return;
-/* {{{ pending */
+ afr_ta_lock_release_synctask(this);
+}
+static void
+afr_ta_process_onwireq(afr_ta_fop_state_t fop_state, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ afr_local_t *entry = NULL;
+ int bad_child = AFR_CHILD_UNKNOWN;
+
+ struct list_head onwireq = {
+ 0,
+ };
+ INIT_LIST_HEAD(&onwireq);
+
+ LOCK(&priv->lock);
+ {
+ bad_child = priv->ta_bad_child_index;
+ if (bad_child == AFR_CHILD_UNKNOWN) {
+ /*The previous on-wire ta_post_op was a failure. Just dequeue
+ *one element to wind on-wire again. */
+ entry = list_entry(priv->ta_onwireq.next, afr_local_t, ta_onwireq);
+ list_del_init(&entry->ta_onwireq);
+ } else {
+ /* Prepare to process all fops based on bad_child_index. */
+ list_splice_init(&priv->ta_onwireq, &onwireq);
+ }
+ }
+ UNLOCK(&priv->lock);
+
+ if (entry) {
+ afr_ta_post_op_synctask(this, entry);
+ return;
+ } else {
+ while (!list_empty(&onwireq)) {
+ entry = list_entry(onwireq.next, afr_local_t, ta_onwireq);
+ list_del_init(&entry->ta_onwireq);
+ if (entry->ta_failed_subvol == bad_child) {
+ afr_post_op_handle_success(entry->transaction.frame, this);
+ } else {
+ afr_post_op_handle_failure(entry->transaction.frame, this, EIO);
+ }
+ }
+ }
+}
int
-afr_changelog_post_op_done (call_frame_t *frame, xlator_t *this)
+afr_changelog_post_op_done(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_private_t *priv = NULL;
- local = frame->local;
- priv = this->private;
- int_lock = &local->internal_lock;
+ local = frame->local;
+ priv = this->private;
+ int_lock = &local->internal_lock;
- if (local->transaction.resume_stub) {
- call_resume (local->transaction.resume_stub);
- local->transaction.resume_stub = NULL;
- }
+ if (priv->thin_arbiter_count) {
+ /*fop should not come here with TA_WAIT_FOR_NOTIFY_LOCK_REL state */
+ afr_ta_dom_lock_check_and_release(local->fop_state, this);
+ }
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- local->transaction.done (frame, this);
- } else {
- int_lock->lock_cbk = local->transaction.done;
- afr_unlock (frame, this);
- }
+ /* Fail the FOP if post-op did not succeed on quorum no. of bricks. */
+ if (!afr_changelog_has_quorum(local, this)) {
+ local->op_ret = -1;
+ /*local->op_errno is already captured in changelog cbk*/
+ }
- return 0;
-}
+ if (local->transaction.resume_stub) {
+ call_resume(local->transaction.resume_stub);
+ local->transaction.resume_stub = NULL;
+ }
+ int_lock->lock_cbk = afr_transaction_done;
+ afr_unlock(frame, this);
-afr_inodelk_t*
-afr_get_inodelk (afr_internal_lock_t *int_lock, char *dom)
-{
- afr_inodelk_t *inodelk = NULL;
- int i = 0;
-
- for (i = 0; int_lock->inodelk[i].domain; i++) {
- inodelk = &int_lock->inodelk[i];
- if (strcmp (dom, inodelk->domain) == 0)
- return inodelk;
- }
- return NULL;
+ return 0;
}
-unsigned char*
-afr_locked_nodes_get (afr_transaction_type type, afr_internal_lock_t *int_lock)
+static void
+afr_changelog_post_op_fail(call_frame_t *frame, xlator_t *this, int op_errno)
{
- unsigned char *locked_nodes = NULL;
- afr_inodelk_t *inodelk = NULL;
- switch (type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- locked_nodes = inodelk->locked_nodes;
- break;
+ afr_local_t *local = frame->local;
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- case AFR_ENTRY_TRANSACTION:
- case AFR_ENTRY_RENAME_TRANSACTION:
- /*Because same set of subvols participate in all lockee
- * entities*/
- locked_nodes = int_lock->lockee[0].locked_nodes;
- break;
- }
- return locked_nodes;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_THIN_ARB,
+ "Failing %s for gfid %s. Fop state is:%d", gf_fop_list[local->op],
+ uuid_utoa(local->inode->gfid), local->fop_state);
+
+ afr_changelog_post_op_done(frame, this);
}
+unsigned char *
+afr_locked_nodes_get(afr_transaction_type type, afr_internal_lock_t *int_lock)
+{
+ /*Because same set of subvols participate in all lockee
+ * entities*/
+ return int_lock->lockee[0].locked_nodes;
+}
int
-afr_changelog_call_count (afr_transaction_type type,
- unsigned char *pre_op_subvols,
- unsigned int child_count)
+afr_changelog_call_count(afr_transaction_type type,
+ unsigned char *pre_op_subvols,
+ unsigned char *failed_subvols,
+ unsigned int child_count)
{
- int call_count = 0;
+ int i = 0;
+ int call_count = 0;
- call_count = AFR_COUNT(pre_op_subvols, child_count);
+ for (i = 0; i < child_count; i++) {
+ if (pre_op_subvols[i] && !failed_subvols[i]) {
+ call_count++;
+ }
+ }
- if (type == AFR_ENTRY_RENAME_TRANSACTION)
- call_count *= 2;
+ if (type == AFR_ENTRY_RENAME_TRANSACTION)
+ call_count *= 2;
- return call_count;
+ return call_count;
}
-
gf_boolean_t
-afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this)
+afr_txn_nothing_failed(call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.failed_subvols[i])
- return _gf_false;
- }
-
- return _gf_true;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (priv->thin_arbiter_count) {
+ /* We need to perform post-op even if 1 data brick was down
+ * before the txn started.*/
+ if (AFR_COUNT(local->transaction.failed_subvols, priv->child_count))
+ return _gf_false;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] &&
+ local->transaction.failed_subvols[i])
+ return _gf_false;
+ }
+
+ return _gf_true;
}
-
void
-afr_handle_symmetric_errors (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int op_errno = 0;
- int i_errno = 0;
- gf_boolean_t matching_errors = _gf_true;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!local->replies[i].valid)
- continue;
- if (local->replies[i].op_ret != -1) {
- /* Operation succeeded on at least on subvol,
- so it is not a failed-everywhere situation.
- */
- matching_errors = _gf_false;
- break;
- }
- i_errno = local->replies[i].op_errno;
-
- if (i_errno == ENOTCONN) {
- /* ENOTCONN is not a symmetric error. We do not
- know if the operation was performed on the
- backend or not.
- */
- matching_errors = _gf_false;
- break;
- }
-
- if (!op_errno) {
- op_errno = i_errno;
- } else if (op_errno != i_errno) {
- /* Mismatching op_errno's */
- matching_errors = _gf_false;
- break;
- }
- }
-
- if (matching_errors)
- __mark_all_success (frame, this);
+afr_handle_symmetric_errors(call_frame_t *frame, xlator_t *this)
+{
+ if (afr_is_symmetric_error(frame, this))
+ __mark_all_success(frame, this);
}
gf_boolean_t
-afr_has_quorum (unsigned char *subvols, xlator_t *this)
+afr_has_quorum(unsigned char *subvols, xlator_t *this, call_frame_t *frame)
{
- unsigned int quorum_count = 0;
- afr_private_t *priv = NULL;
- unsigned int up_children_count = 0;
+ unsigned int quorum_count = 0;
+ afr_private_t *priv = NULL;
+ unsigned int up_children_count = 0;
- priv = this->private;
- up_children_count = AFR_COUNT (subvols, priv->child_count);
+ priv = this->private;
+ up_children_count = AFR_COUNT(subvols, priv->child_count);
- if (priv->quorum_count == AFR_QUORUM_AUTO) {
+ if (afr_lookup_has_quorum(frame, up_children_count))
+ return _gf_true;
+
+ if (priv->quorum_count == AFR_QUORUM_AUTO) {
/*
- * Special case for even numbers of nodes in auto-quorum:
- * if we have exactly half children up
- * and that includes the first ("senior-most") node, then that counts
- * as quorum even if it wouldn't otherwise. This supports e.g. N=2
- * while preserving the critical property that there can only be one
- * such group.
+ * Special case for auto-quorum with an even number of nodes.
+ *
+ * A replica set with even count N can only handle the same
+ * number of failures as odd N-1 before losing "vanilla"
+ * quorum, and the probability of more simultaneous failures is
+ * actually higher. For example, with a 1% chance of failure
+ * we'd have a 0.03% chance of two simultaneous failures with
+ * N=3 but a 0.06% chance with N=4. However, the special case
+ * is necessary for N=2 because there's no real quorum in that
+ * case (i.e. can't normally survive *any* failures). In that
+ * case, we treat the first node as a tie-breaker, allowing
+ * quorum to be retained in some cases while still honoring the
+ * all-important constraint that there can not simultaneously
+ * be two partitioned sets of nodes each believing they have
+ * quorum. Of two equally sized sets, the one without that
+ * first node will lose.
+ *
+ * It turns out that the special case is beneficial for higher
+ * values of N as well. Continuing the example above, the
+ * probability of losing quorum with N=4 and this type of
+ * quorum is (very) slightly lower than with N=3 and vanilla
+ * quorum. The difference becomes even more pronounced with
+ * higher N. Therefore, even though such replica counts are
+ * unlikely to be seen in practice, we might as well use the
+ * "special" quorum then as well.
*/
- if ((priv->child_count % 2 == 0) &&
- (up_children_count == (priv->child_count/2)))
- return subvols[0];
+ if ((up_children_count * 2) == priv->child_count) {
+ return subvols[0];
}
+ }
- if (priv->quorum_count == AFR_QUORUM_AUTO) {
- quorum_count = priv->child_count/2 + 1;
- } else {
- quorum_count = priv->quorum_count;
- }
+ if (priv->quorum_count == AFR_QUORUM_AUTO) {
+ quorum_count = priv->child_count / 2 + 1;
+ } else {
+ quorum_count = priv->quorum_count;
+ }
- if (up_children_count >= quorum_count)
- return _gf_true;
+ if (up_children_count >= quorum_count)
+ return _gf_true;
- return _gf_false;
+ return _gf_false;
}
static gf_boolean_t
-afr_has_fop_quorum (call_frame_t *frame)
+afr_has_fop_quorum(call_frame_t *frame)
{
- xlator_t *this = frame->this;
- afr_local_t *local = frame->local;
- unsigned char *locked_nodes = NULL;
+ xlator_t *this = frame->this;
+ afr_local_t *local = frame->local;
+ unsigned char *locked_nodes = NULL;
- locked_nodes = afr_locked_nodes_get (local->transaction.type,
- &local->internal_lock);
- return afr_has_quorum (locked_nodes, this);
+ locked_nodes = afr_locked_nodes_get(local->transaction.type,
+ &local->internal_lock);
+ return afr_has_quorum(locked_nodes, this, NULL);
}
static gf_boolean_t
-afr_has_fop_cbk_quorum (call_frame_t *frame)
+afr_has_fop_cbk_quorum(call_frame_t *frame)
{
- afr_local_t *local = frame->local;
- xlator_t *this = frame->this;
- afr_private_t *priv = this->private;
- unsigned char *success = alloca0(priv->child_count);
- int i = 0;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i])
- if (!local->transaction.failed_subvols[i])
- success[i] = 1;
- }
-
- return afr_has_quorum (success, this);
+ afr_local_t *local = frame->local;
+ xlator_t *this = frame->this;
+ afr_private_t *priv = this->private;
+ unsigned char *success = alloca0(priv->child_count);
+ int i = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i])
+ if (!local->transaction.failed_subvols[i])
+ success[i] = 1;
+ }
+
+ return afr_has_quorum(success, this, NULL);
}
-void
-afr_handle_quorum (call_frame_t *frame)
+gf_boolean_t
+afr_need_dirty_marking(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- const char *file = NULL;
- uuid_t gfid = {0};
+ afr_private_t *priv = this->private;
+ afr_local_t *local = NULL;
+ gf_boolean_t need_dirty = _gf_false;
- local = frame->local;
- priv = frame->this->private;
+ local = frame->local;
- if (priv->quorum_count == 0)
- return;
+ if (!priv->quorum_count || !local->optimistic_change_log)
+ return _gf_false;
- /* If the fop already failed return right away to preserve errno */
- if (local->op_ret == -1)
- return;
+ if (local->transaction.type == AFR_DATA_TRANSACTION ||
+ local->transaction.type == AFR_METADATA_TRANSACTION)
+ return _gf_false;
- /*
- * Network split may happen just after the fops are unwound, so check
- * if the fop succeeded in a way it still follows quorum. If it doesn't,
- * mark the fop as failure, mark the changelogs so it reflects that
- * failure.
- *
- * Scenario:
- * There are 3 mounts on 3 machines(node1, node2, node3) all writing to
- * single file. Network split happened in a way that node1 can't see
- * node2, node3. Node2, node3 both of them can't see node1. Now at the
- * time of sending write all the bricks are up. Just after write fop is
- * wound on node1, network split happens. Node1 thinks write fop failed
- * on node2, node3 so marks pending changelog for those 2 extended
- * attributes on node1. Node2, node3 thinks writes failed on node1 so
- * they mark pending changelog for node1. When the network is stable
- * again the file already is in split-brain. These checks prevent
- * marking pending changelog on other subvolumes if the fop doesn't
- * succeed in a way it is still following quorum. So with this fix what
- * is happening is, node1 will have all pending changelog(FOOL) because
- * the write succeeded only on node1 but failed on node2, node3 so
- * instead of marking pending changelogs on node2, node3 it just treats
- * the fop as failure and goes into DIRTY state. Where as node2, node3
- * say they are sources and have pending changelog to node1 so there is
- * no split-brain with the fix. The problem is eliminated completely.
- */
+ if (AFR_COUNT(local->transaction.failed_subvols, priv->child_count) ==
+ priv->child_count)
+ return _gf_false;
- if (afr_has_fop_cbk_quorum (frame))
- return;
+ if (!afr_has_fop_cbk_quorum(frame))
+ need_dirty = _gf_true;
- if (local->fd) {
- gf_uuid_copy (gfid, local->fd->inode->gfid);
- file = uuid_utoa (gfid);
- } else {
- loc_path (&local->loc, local->loc.name);
- file = local->loc.path;
- }
+ return need_dirty;
+}
- gf_msg (frame->this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL,
- "%s: Failing %s as quorum is not met",
- file, gf_fop_list[local->op]);
+void
+afr_handle_quorum(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ const char *file = NULL;
+ uuid_t gfid = {0};
+
+ local = frame->local;
+ priv = frame->this->private;
+
+ if (priv->quorum_count == 0)
+ return;
+
+ /* If the fop already failed return right away to preserve errno */
+ if (local->op_ret == -1)
+ return;
+
+ /*
+ * Network split may happen just after the fops are unwound, so check
+ * if the fop succeeded in a way it still follows quorum. If it doesn't,
+ * mark the fop as failure, mark the changelogs so it reflects that
+ * failure.
+ *
+ * Scenario:
+ * There are 3 mounts on 3 machines(node1, node2, node3) all writing to
+ * single file. Network split happened in a way that node1 can't see
+ * node2, node3. Node2, node3 both of them can't see node1. Now at the
+ * time of sending write all the bricks are up. Just after write fop is
+ * wound on node1, network split happens. Node1 thinks write fop failed
+ * on node2, node3 so marks pending changelog for those 2 extended
+ * attributes on node1. Node2, node3 thinks writes failed on node1 so
+ * they mark pending changelog for node1. When the network is stable
+ * again the file already is in split-brain. These checks prevent
+ * marking pending changelog on other subvolumes if the fop doesn't
+ * succeed in a way it is still following quorum. So with this fix what
+ * is happening is, node1 will have all pending changelog(FOOL) because
+ * the write succeeded only on node1 but failed on node2, node3 so
+ * instead of marking pending changelogs on node2, node3 it just treats
+ * the fop as failure and goes into DIRTY state. Where as node2, node3
+ * say they are sources and have pending changelog to node1 so there is
+ * no split-brain with the fix. The problem is eliminated completely.
+ */
+
+ if (afr_has_fop_cbk_quorum(frame))
+ return;
+
+ if (afr_need_dirty_marking(frame, this))
+ goto set_response;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i])
+ afr_transaction_fop_failed(frame, frame->this, i);
+ }
+
+set_response:
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+ if (local->op_errno == 0)
+ local->op_errno = afr_quorum_errno(priv);
+
+ if (local->fd) {
+ gf_uuid_copy(gfid, local->fd->inode->gfid);
+ file = uuid_utoa(gfid);
+ } else {
+ loc_path(&local->loc, local->loc.name);
+ file = local->loc.path;
+ }
+
+ gf_msg(frame->this->name, GF_LOG_WARNING, local->op_errno,
+ AFR_MSG_QUORUM_FAIL, "%s: Failing %s as quorum is not met", file,
+ gf_fop_list[local->op]);
+
+ switch (local->transaction.type) {
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ afr_pick_error_xdata(local, priv, local->parent, local->readable,
+ local->parent2, local->readable2);
+ break;
+ default:
+ afr_pick_error_xdata(local, priv, local->inode, local->readable,
+ NULL, NULL);
+ break;
+ }
+}
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i])
- afr_transaction_fop_failed (frame, frame->this, i);
- }
+int
+afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop)
+{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ loc->parent = inode_ref(priv->root_inode);
+ gf_uuid_copy(loc->pargfid, loc->parent->gfid);
+ loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX];
+ if (is_gfid_based_fop && gf_uuid_is_null(priv->ta_gfid)) {
+ /* Except afr_ta_id_file_check() which is path based, all other gluster
+ * FOPS need gfid.*/
+ return -EINVAL;
+ }
+ gf_uuid_copy(loc->gfid, priv->ta_gfid);
+ loc->inode = inode_new(loc->parent->table);
+ if (!loc->inode) {
+ loc_wipe(loc);
+ return -ENOMEM;
+ }
+ return 0;
+}
- local->op_ret = -1;
- local->op_errno = afr_quorum_errno (priv);
+static int
+afr_ta_post_op_done(int ret, call_frame_t *frame, void *opaque)
+{
+ xlator_t *this = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *txn_frame = NULL;
+ afr_ta_fop_state_t fop_state;
+
+ local = (afr_local_t *)opaque;
+ fop_state = local->fop_state;
+ txn_frame = local->transaction.frame;
+ this = frame->this;
+
+ if (ret == 0) {
+ /*Mark pending xattrs on the up data brick.*/
+ afr_post_op_handle_success(txn_frame, this);
+ } else {
+ afr_post_op_handle_failure(txn_frame, this, -ret);
+ }
+
+ STACK_DESTROY(frame->root);
+ afr_ta_process_onwireq(fop_state, this);
+
+ return 0;
}
-int
-afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = this->private;
- int i = 0;
- int ret = 0;
- int idx = 0;
- afr_local_t * local = NULL;
- dict_t *xattr = NULL;
- int nothing_failed = 1;
- gf_boolean_t need_undirty = _gf_false;
-
- afr_handle_quorum (frame);
- local = frame->local;
- idx = afr_index_for_transaction_type (local->transaction.type);
-
- nothing_failed = afr_txn_nothing_failed (frame, this);
-
- if (afr_changelog_pre_op_uninherit (frame, this))
- need_undirty = _gf_false;
- else
- need_undirty = _gf_true;
-
- //If the fop fails on all the subvols then pending markers are placed
- //for every subvol on all subvolumes. Which is nothing but split-brain.
- //Avoid this by not doing post-op in case of failures.
- if (local->op_ret < 0) {
- afr_changelog_post_op_done (frame, this);
- goto out;
+int **
+afr_set_changelog_xattr(afr_private_t *priv, unsigned char *pending,
+ dict_t *xattr, afr_local_t *local)
+{
+ int **changelog = NULL;
+ int idx = 0;
+ int ret = 0;
+ int i;
+
+ if (local->is_new_entry == _gf_true) {
+ changelog = afr_mark_pending_changelog(priv, pending, xattr,
+ local->cont.dir_fop.buf.ia_type);
+ } else {
+ idx = afr_index_for_transaction_type(local->transaction.type);
+ changelog = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!changelog) {
+ goto out;
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i])
+ changelog[i][idx] = hton32(1);
+ }
+ ret = afr_set_pending_dict(priv, xattr, changelog);
+ if (ret < 0) {
+ afr_matrix_cleanup(changelog, priv->child_count);
+ return NULL;
}
+ }
- if (nothing_failed && !need_undirty) {
- afr_changelog_post_op_done (frame, this);
- goto out;
- }
-
- xattr = dict_new ();
- if (!xattr) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- afr_changelog_post_op_done (frame, this);
- goto out;
- }
-
- if (need_undirty)
- local->dirty[idx] = hton32(-1);
- else
- local->dirty[idx] = hton32(0);
-
- ret = dict_set_static_bin (xattr, AFR_DIRTY, local->dirty,
- sizeof(int) * AFR_NUM_CHANGE_LOGS);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- afr_changelog_post_op_done (frame, this);
- goto out;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.failed_subvols[i])
- local->pending[i][idx] = hton32(1);
- }
-
- ret = afr_set_pending_dict (priv, xattr, local->pending);
- if (ret < 0) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- afr_changelog_post_op_done (frame, this);
- goto out;
- }
-
- afr_changelog_do (frame, this, xattr, afr_changelog_post_op_done);
out:
- if (xattr)
- dict_unref (xattr);
-
- return 0;
+ return changelog;
}
+static void
+afr_ta_locked_xattrop_validate(afr_private_t *priv, afr_local_t *local,
+ gf_boolean_t *valid)
+{
+ if (priv->ta_event_gen > local->ta_event_gen) {
+ /* We can't trust the ta's response anymore.*/
+ afr_ta_locked_priv_invalidate(priv);
+ *valid = _gf_false;
+ return;
+ }
+ return;
+}
-gf_boolean_t
-afr_changelog_pre_op_uninherit (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- fd_t *fd = NULL;
- int i = 0;
- gf_boolean_t ret = _gf_false;
- afr_fd_ctx_t *fd_ctx = NULL;
- int type = 0;
-
- local = frame->local;
- priv = this->private;
- fd = local->fd;
-
- type = afr_index_for_transaction_type (local->transaction.type);
- if (type != AFR_DATA_TRANSACTION)
- return !local->transaction.dirtied;
-
- if (!fd)
- return !local->transaction.dirtied;
-
- fd_ctx = afr_fd_ctx_get (fd, this);
- if (!fd_ctx)
- return _gf_false;
-
- if (local->transaction.no_uninherit)
- return _gf_false;
-
- /* This function must be idempotent. So check if we
- were called before and return the same answer again.
-
- It is important to keep this function idempotent for
- the call in afr_changelog_post_op_safe() to not have
- side effects on the call from afr_changelog_post_op_now()
- */
- if (local->transaction.uninherit_done)
- return local->transaction.uninherit_value;
-
- LOCK(&fd->lock);
- {
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i] !=
- fd_ctx->pre_op_done[type][i]) {
- ret = !local->transaction.dirtied;
- goto unlock;
- }
- }
-
- if (fd_ctx->inherited[type]) {
- ret = _gf_true;
- fd_ctx->inherited[type]--;
- } else if (fd_ctx->on_disk[type]) {
- ret = _gf_false;
- fd_ctx->on_disk[type]--;
- } else {
- /* ASSERT */
- ret = _gf_false;
- }
-
- if (!fd_ctx->inherited[type] && !fd_ctx->on_disk[type]) {
- for (i = 0; i < priv->child_count; i++)
- fd_ctx->pre_op_done[type][i] = 0;
- }
- }
-unlock:
- UNLOCK(&fd->lock);
+static int
+afr_ta_post_op_do(void *opaque)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t *this = NULL;
+ dict_t *xattr = NULL;
+ unsigned char *pending = NULL;
+ int **changelog = NULL;
+ int failed_subvol = -1;
+ int success_subvol = -1;
+ loc_t loc = {
+ 0,
+ };
+ int i = 0;
+ int ret = 0;
+ gf_boolean_t valid = _gf_true;
+
+ local = (afr_local_t *)opaque;
+ this = local->transaction.frame->this;
+ priv = this->private;
+
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate loc for thin-arbiter.");
+ goto out;
+ }
+
+ xattr = dict_new();
+ if (!xattr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ pending = alloca0(priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i]) {
+ pending[i] = 1;
+ failed_subvol = i;
+ } else {
+ success_subvol = i;
+ }
+ }
+
+ changelog = afr_set_changelog_xattr(priv, pending, xattr, local);
+
+ if (!changelog) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = afr_ta_post_op_lock(this, &loc);
+ if (ret)
+ goto out;
+
+ ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], &loc,
+ GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Post-op on thin-arbiter id file %s failed for gfid %s.",
+ priv->pending_key[THIN_ARBITER_BRICK_INDEX],
+ uuid_utoa(local->inode->gfid));
+ }
+ LOCK(&priv->lock);
+ {
+ if (ret == 0) {
+ priv->ta_bad_child_index = failed_subvol;
+ } else if (ret == -EINVAL) {
+ priv->ta_bad_child_index = success_subvol;
+ ret = -EIO; /* TA failed the fop. Return EIO to application. */
+ }
- local->transaction.uninherit_done = _gf_true;
- local->transaction.uninherit_value = ret;
+ afr_ta_locked_xattrop_validate(priv, local, &valid);
+ }
+ UNLOCK(&priv->lock);
+ if (valid == _gf_false) {
+ gf_msg(this->name, GF_LOG_ERROR, EIO, AFR_MSG_THIN_ARB,
+ "Post-op on thin-arbiter id file %s for gfid %s invalidated due "
+ "to event-gen mismatch.",
+ priv->pending_key[THIN_ARBITER_BRICK_INDEX],
+ uuid_utoa(local->inode->gfid));
+ ret = -EIO;
+ }
+
+ afr_ta_post_op_unlock(this, &loc);
+out:
+ if (xattr)
+ dict_unref(xattr);
- return ret;
-}
+ if (changelog)
+ afr_matrix_cleanup(changelog, priv->child_count);
+ loc_wipe(&loc);
-gf_boolean_t
-afr_changelog_pre_op_inherit (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- fd_t *fd = NULL;
- int i = 0;
- gf_boolean_t ret = _gf_false;
- afr_fd_ctx_t *fd_ctx = NULL;
- int type = 0;
-
- local = frame->local;
- priv = this->private;
- fd = local->fd;
-
- if (local->transaction.type != AFR_DATA_TRANSACTION)
- return _gf_false;
-
- type = afr_index_for_transaction_type (local->transaction.type);
-
- if (!fd)
- return _gf_false;
-
- fd_ctx = afr_fd_ctx_get (fd, this);
- if (!fd_ctx)
- return _gf_false;
-
- LOCK(&fd->lock);
- {
- if (!fd_ctx->on_disk[type]) {
- /* nothing to inherit yet */
- ret = _gf_false;
- goto unlock;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i] !=
- fd_ctx->pre_op_done[type][i]) {
- /* either inherit exactly, or don't */
- ret = _gf_false;
- goto unlock;
- }
- }
-
- fd_ctx->inherited[type]++;
-
- ret = _gf_true;
-
- local->transaction.inherited = _gf_true;
- }
-unlock:
- UNLOCK(&fd->lock);
+ return ret;
+}
- return ret;
+static int
+afr_ta_post_op_synctask(xlator_t *this, afr_local_t *local)
+{
+ call_frame_t *ta_frame = NULL;
+ int ret = 0;
+
+ ta_frame = afr_ta_frame_create(this);
+ if (!ta_frame) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ "Failed to create ta_frame");
+ goto err;
+ }
+ ret = synctask_new(this->ctx->env, afr_ta_post_op_do, afr_ta_post_op_done,
+ ta_frame, local);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ "Failed to launch post-op on thin arbiter for gfid %s",
+ uuid_utoa(local->inode->gfid));
+ STACK_DESTROY(ta_frame->root);
+ goto err;
+ }
+
+ return ret;
+err:
+ afr_changelog_post_op_fail(local->transaction.frame, this, ENOMEM);
+ return ret;
}
+static void
+afr_ta_set_fop_state(afr_private_t *priv, afr_local_t *local,
+ int *on_wire_count)
+{
+ LOCK(&priv->lock);
+ {
+ if (priv->release_ta_notify_dom_lock == _gf_true) {
+ /* Put the fop in waitq until notify dom lock is released.*/
+ local->fop_state = TA_WAIT_FOR_NOTIFY_LOCK_REL;
+ list_add_tail(&local->ta_waitq, &priv->ta_waitq);
+ } else if (priv->ta_bad_child_index == AFR_CHILD_UNKNOWN) {
+ /* Post-op on thin-arbiter to decide success/failure. */
+ local->fop_state = TA_GET_INFO_FROM_TA_FILE;
+ *on_wire_count = ++priv->ta_on_wire_txn_count;
+ if (*on_wire_count > 1) {
+ /*Avoid sending multiple on-wire post-ops on TA*/
+ list_add_tail(&local->ta_onwireq, &priv->ta_onwireq);
+ }
+ } else if (local->ta_failed_subvol == priv->ta_bad_child_index) {
+ /* Post-op on TA not needed as the fop failed on the in-memory bad
+ * brick. Just mark pending xattrs on the good data brick.*/
+ local->fop_state = TA_INFO_IN_MEMORY_SUCCESS;
+ priv->ta_in_mem_txn_count++;
+ } else {
+ /* Post-op on TA not needed as the fop succeeded only on the
+ * in-memory bad data brick and not the good one. Fail the fop.*/
+ local->fop_state = TA_INFO_IN_MEMORY_FAILED;
+ priv->ta_in_mem_txn_count++;
+ }
+ }
+ UNLOCK(&priv->lock);
+}
-gf_boolean_t
-afr_changelog_pre_op_update (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- fd_t *fd = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- int i = 0;
- gf_boolean_t ret = _gf_false;
- int type = 0;
-
- local = frame->local;
- priv = this->private;
- fd = local->fd;
-
- if (!fd)
- return _gf_false;
-
- fd_ctx = afr_fd_ctx_get (fd, this);
- if (!fd_ctx)
- return _gf_false;
-
- if (local->transaction.inherited)
- /* was already inherited in afr_changelog_pre_op */
- return _gf_false;
-
- if (!local->transaction.dirtied)
- return _gf_false;
-
- if (!afr_txn_nothing_failed (frame, this))
- return _gf_false;
-
- type = afr_index_for_transaction_type (local->transaction.type);
-
- ret = _gf_false;
-
- LOCK(&fd->lock);
- {
- if (!fd_ctx->on_disk[type]) {
- for (i = 0; i < priv->child_count; i++)
- fd_ctx->pre_op_done[type][i] =
- local->transaction.pre_op[i];
- } else {
- for (i = 0; i < priv->child_count; i++)
- if (fd_ctx->pre_op_done[type][i] !=
- local->transaction.pre_op[i]) {
- local->transaction.no_uninherit = 1;
- goto unlock;
- }
- }
- fd_ctx->on_disk[type]++;
-
- ret = _gf_true;
- }
-unlock:
- UNLOCK(&fd->lock);
+static void
+afr_ta_fill_failed_subvol(afr_private_t *priv, afr_local_t *local)
+{
+ int i = 0;
- return ret;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i]) {
+ local->ta_failed_subvol = i;
+ break;
+ }
+ }
}
-
-int
-afr_changelog_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+static void
+afr_post_op_handle_success(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- int call_count = -1;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
+ if (local->is_new_entry == _gf_true) {
+ afr_mark_new_entry_changelog(frame, this);
+ }
+ afr_changelog_post_op_do(frame, this);
- if (op_ret == -1)
- afr_transaction_fop_failed (frame, this, (long) cookie);
+ return;
+}
- call_count = afr_frame_return (frame);
+static void
+afr_post_op_handle_failure(call_frame_t *frame, xlator_t *this, int op_errno)
+{
+ afr_changelog_post_op_fail(frame, this, op_errno);
- if (call_count == 0)
- local->transaction.changelog_resume (frame, this);
+ return;
+}
- return 0;
+static void
+afr_ta_decide_post_op_state(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int on_wire_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ afr_ta_set_fop_state(priv, local, &on_wire_count);
+
+ switch (local->fop_state) {
+ case TA_GET_INFO_FROM_TA_FILE:
+ if (on_wire_count == 1)
+ afr_ta_post_op_synctask(this, local);
+ /*else, fop is queued in ta_onwireq.*/
+ break;
+ case TA_WAIT_FOR_NOTIFY_LOCK_REL:
+ /*Post releasing the notify lock, we will act on this queue*/
+ break;
+ case TA_INFO_IN_MEMORY_SUCCESS:
+ afr_post_op_handle_success(frame, this);
+ break;
+ case TA_INFO_IN_MEMORY_FAILED:
+ afr_post_op_handle_failure(frame, this, EIO);
+ break;
+ default:
+ break;
+ }
+ return;
}
+static void
+afr_handle_failure_using_thin_arbiter(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ afr_local_t *local = frame->local;
+
+ afr_ta_fill_failed_subvol(priv, local);
+ gf_msg_debug(this->name, 0,
+ "Fop failed on data brick (%s) for gfid=%s. "
+ "ta info needed to decide fop result.",
+ priv->children[local->ta_failed_subvol]->name,
+ uuid_utoa(local->inode->gfid));
+ afr_ta_decide_post_op_state(frame, this);
+}
-int
-afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
- afr_changelog_resume_t changelog_resume)
+void
+afr_changelog_post_op_do(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = 0;
+ afr_private_t *priv = this->private;
+ afr_local_t *local = NULL;
+ dict_t *xattr = NULL;
+ int i = 0;
+ int ret = 0;
+ int idx = 0;
+ int nothing_failed = 1;
+ gf_boolean_t need_undirty = _gf_false;
+
+ afr_handle_quorum(frame, this);
+ local = frame->local;
+ idx = afr_index_for_transaction_type(local->transaction.type);
+
+ xattr = dict_new();
+ if (!xattr) {
+ afr_changelog_post_op_fail(frame, this, ENOMEM);
+ goto out;
+ }
+
+ nothing_failed = afr_txn_nothing_failed(frame, this);
+
+ if (afr_changelog_pre_op_uninherit(frame, this))
+ need_undirty = _gf_false;
+ else
+ need_undirty = _gf_true;
+
+ if (local->op_ret < 0 && !nothing_failed) {
+ if (afr_need_dirty_marking(frame, this)) {
+ local->dirty[idx] = hton32(1);
+ goto set_dirty;
+ }
+
+ afr_changelog_post_op_done(frame, this);
+ goto out;
+ }
+
+ if (nothing_failed && !need_undirty) {
+ afr_changelog_post_op_done(frame, this);
+ goto out;
+ }
+
+ if (local->transaction.in_flight_sb) {
+ afr_changelog_post_op_fail(frame, this,
+ local->transaction.in_flight_sb_errno);
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i])
+ local->pending[i][idx] = hton32(1);
+ }
+
+ ret = afr_set_pending_dict(priv, xattr, local->pending);
+ if (ret < 0) {
+ afr_changelog_post_op_fail(frame, this, ENOMEM);
+ goto out;
+ }
+
+ if (need_undirty)
+ local->dirty[idx] = hton32(-1);
+ else
+ local->dirty[idx] = hton32(0);
+
+set_dirty:
+ ret = dict_set_static_bin(xattr, AFR_DIRTY, local->dirty,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ afr_changelog_post_op_fail(frame, this, ENOMEM);
+ goto out;
+ }
+
+ afr_changelog_do(frame, this, xattr, afr_changelog_post_op_done,
+ AFR_TRANSACTION_POST_OP);
+out:
+ if (xattr)
+ dict_unref(xattr);
+
+ return;
+}
- local = frame->local;
- priv = this->private;
+static int
+afr_changelog_post_op_now(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int failed_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (priv->thin_arbiter_count) {
+ failed_count = AFR_COUNT(local->transaction.failed_subvols,
+ priv->child_count);
+ if (failed_count == 1) {
+ afr_handle_failure_using_thin_arbiter(frame, this);
+ return 0;
+ } else {
+ /* Txn either succeeded or failed on both data bricks. Let
+ * post_op_do handle it as the case might be. */
+ }
+ }
- call_count = afr_changelog_call_count (local->transaction.type,
- local->transaction.pre_op,
- priv->child_count);
+ afr_changelog_post_op_do(frame, this);
+ return 0;
+}
- if (call_count == 0) {
- changelog_resume (frame, this);
- return 0;
- }
+gf_boolean_t
+afr_changelog_pre_op_uninherit(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+ int i = 0;
+ gf_boolean_t ret = _gf_false;
+ int type = 0;
+
+ local = frame->local;
+ priv = this->private;
+ ctx = local->inode_ctx;
+
+ type = afr_index_for_transaction_type(local->transaction.type);
+ if (type != AFR_DATA_TRANSACTION)
+ return !local->transaction.dirtied;
+
+ if (local->transaction.no_uninherit)
+ return _gf_false;
- local->call_count = call_count;
+ /* This function must be idempotent. So check if we
+ were called before and return the same answer again.
- local->transaction.changelog_resume = changelog_resume;
+ It is important to keep this function idempotent for
+ the call in afr_changelog_post_op_safe() to not have
+ side effects on the call from afr_changelog_post_op_now()
+ */
+ if (local->transaction.uninherit_done)
+ return local->transaction.uninherit_value;
+ LOCK(&local->inode->lock);
+ {
for (i = 0; i < priv->child_count; i++) {
- if (!local->transaction.pre_op[i])
- continue;
-
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- if (!local->fd) {
- STACK_WIND_COOKIE (frame, afr_changelog_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, xattr,
- NULL);
- } else {
- STACK_WIND_COOKIE (frame, afr_changelog_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr,
- NULL);
- }
- break;
- case AFR_ENTRY_RENAME_TRANSACTION:
-
- STACK_WIND_COOKIE (frame, afr_changelog_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.new_parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr,
- NULL);
- call_count--;
+ if (local->transaction.pre_op[i] != ctx->pre_op_done[type][i]) {
+ ret = !local->transaction.dirtied;
+ goto unlock;
+ }
+ }
- /* fall through */
+ if (ctx->inherited[type]) {
+ ret = _gf_true;
+ ctx->inherited[type]--;
+ } else if (ctx->on_disk[type]) {
+ ret = _gf_false;
+ ctx->on_disk[type]--;
+ } else {
+ /* ASSERT */
+ ret = _gf_false;
+ }
- case AFR_ENTRY_TRANSACTION:
- if (local->fd)
- STACK_WIND_COOKIE (frame, afr_changelog_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr,
- NULL);
- else
- STACK_WIND_COOKIE (frame, afr_changelog_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr,
- NULL);
- break;
- }
-
- if (!--call_count)
- break;
+ if (!ctx->inherited[type] && !ctx->on_disk[type]) {
+ for (i = 0; i < priv->child_count; i++)
+ ctx->pre_op_done[type][i] = 0;
}
+ }
+unlock:
+ UNLOCK(&local->inode->lock);
- return 0;
+ local->transaction.uninherit_done = _gf_true;
+ local->transaction.uninherit_value = ret;
+
+ return ret;
}
+gf_boolean_t
+afr_changelog_pre_op_inherit(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ gf_boolean_t ret = _gf_false;
+ int type = 0;
+
+ local = frame->local;
+ priv = this->private;
-int
-afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = this->private;
- int i = 0;
- int ret = 0;
- int call_count = 0;
- int op_errno = 0;
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- unsigned char *locked_nodes = NULL;
- unsigned char *pending_subvols = NULL;
- int idx = -1;
- gf_boolean_t pre_nop = _gf_true;
- dict_t *xdata_req = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- idx = afr_index_for_transaction_type (local->transaction.type);
-
- locked_nodes = afr_locked_nodes_get (local->transaction.type, int_lock);
-
- pending_subvols = alloca0 (priv->child_count);
-
- for (i = 0; i < priv->child_count; i++) {
- if (locked_nodes[i]) {
- local->transaction.pre_op[i] = 1;
- call_count++;
- } else {
- pending_subvols[i] = 1;
- }
- }
-
- /* This condition should not be met with present code, as
- * transaction.done will be called if locks are not acquired on even a
- * single node.
- */
- if (call_count == 0) {
- op_errno = ENOTCONN;
- goto err;
- }
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ return _gf_false;
- /* Check if the fop can be performed on at least
- * quorum number of nodes.
- */
- if (priv->quorum_count && !afr_has_fop_quorum (frame)) {
- op_errno = afr_quorum_errno (priv);
- goto err;
- }
+ type = afr_index_for_transaction_type(local->transaction.type);
- xdata_req = dict_new();
- if (!xdata_req) {
- op_errno = ENOMEM;
- goto err;
- }
-
- pre_nop = _gf_true;
-
- if (afr_changelog_pre_op_inherit (frame, this))
- goto next;
-
- if (call_count < priv->child_count) {
- /* For subvols we are not performing operation on,
- mark them as pending up-front along with the FOP
- so that we can safely defer unmarking dirty until
- later.
- */
- for (i = 0; i < priv->child_count; i++) {
- if (pending_subvols[i])
- local->pending[i][idx] = hton32(1);
- }
- ret = afr_set_pending_dict (priv, xdata_req,
- local->pending);
- if (ret < 0) {
- op_errno = ENOMEM;
- goto err;
- }
- pre_nop = _gf_false;
- }
-
- if (call_count > 1 &&
- (local->transaction.type == AFR_DATA_TRANSACTION ||
- !local->optimistic_change_log)) {
-
- /* If we are performing change on only one subvol, no
- need to mark dirty, because we are setting the pending
- counts already anyways
- */
- local->dirty[idx] = hton32(1);
-
- ret = dict_set_static_bin (xdata_req, AFR_DIRTY, local->dirty,
- sizeof(int) * AFR_NUM_CHANGE_LOGS);
- if (ret) {
- op_errno = ENOMEM;
- goto err;
- }
-
- pre_nop = _gf_false;
- local->transaction.dirtied = 1;
- }
-
- if (pre_nop)
- goto next;
-
- if (!local->pre_op_compat) {
- dict_copy (xdata_req, local->xdata_req);
- goto next;
- }
-
- afr_changelog_do (frame, this, xdata_req, afr_transaction_perform_fop);
-
- if (xdata_req)
- dict_unref (xdata_req);
-
- return 0;
-next:
- afr_transaction_perform_fop (frame, this);
+ LOCK(&local->inode->lock);
+ {
+ if (!local->inode_ctx->on_disk[type]) {
+ /* nothing to inherit yet */
+ ret = _gf_false;
+ goto unlock;
+ }
- if (xdata_req)
- dict_unref (xdata_req);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] !=
+ local->inode_ctx->pre_op_done[type][i]) {
+ /* either inherit exactly, or don't */
+ ret = _gf_false;
+ goto unlock;
+ }
+ }
- return 0;
-err:
- local->internal_lock.lock_cbk = local->transaction.done;
- local->op_ret = -1;
- local->op_errno = op_errno;
+ local->inode_ctx->inherited[type]++;
- afr_unlock (frame, this);
+ ret = _gf_true;
- if (xdata_req)
- dict_unref (xdata_req);
+ local->transaction.inherited = _gf_true;
+ }
+unlock:
+ UNLOCK(&local->inode->lock);
- return 0;
+ return ret;
}
-
-int
-afr_post_blocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_changelog_pre_op_update(call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ gf_boolean_t ret = _gf_false;
+ int type = 0;
- local = frame->local;
- int_lock = &local->internal_lock;
+ local = frame->local;
+ priv = this->private;
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Blocking inodelks failed.");
- local->transaction.done (frame, this);
- } else {
+ if (local->transaction.type == AFR_ENTRY_TRANSACTION ||
+ local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION)
+ return _gf_false;
- gf_log (this->name, GF_LOG_DEBUG,
- "Blocking inodelks done. Proceeding to FOP");
- afr_internal_lock_finish (frame, this);
- }
+ if (local->transaction.inherited)
+ /* was already inherited in afr_changelog_pre_op */
+ return _gf_false;
- return 0;
-}
+ if (!local->transaction.dirtied)
+ return _gf_false;
+ if (!afr_txn_nothing_failed(frame, this))
+ return _gf_false;
-int
-afr_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ type = afr_index_for_transaction_type(local->transaction.type);
- local = frame->local;
- int_lock = &local->internal_lock;
+ ret = _gf_false;
- /* Initiate blocking locks if non-blocking has failed */
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking inodelks failed. Proceeding to blocking");
- int_lock->lock_cbk = afr_post_blocking_inodelk_cbk;
- afr_blocking_lock (frame, this);
+ LOCK(&local->inode->lock);
+ {
+ if (!local->inode_ctx->on_disk[type]) {
+ for (i = 0; i < priv->child_count; i++)
+ local->inode_ctx->pre_op_done[type][i] =
+ (!local->transaction.failed_subvols[i]);
} else {
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking inodelks done. Proceeding to FOP");
- afr_internal_lock_finish (frame, this);
+ for (i = 0; i < priv->child_count; i++)
+ if (local->inode_ctx->pre_op_done[type][i] !=
+ (!local->transaction.failed_subvols[i])) {
+ local->transaction.no_uninherit = 1;
+ goto unlock;
+ }
}
+ local->inode_ctx->on_disk[type]++;
- return 0;
-}
+ ret = _gf_true;
+ }
+unlock:
+ UNLOCK(&local->inode->lock);
+ return ret;
+}
int
-afr_post_blocking_entrylk_cbk (call_frame_t *frame, xlator_t *this)
+afr_changelog_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xattr, dict_t *xdata)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int child_index = -1;
- local = frame->local;
- int_lock = &local->internal_lock;
+ local = frame->local;
+ child_index = (long)cookie;
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Blocking entrylks failed.");
- local->transaction.done (frame, this);
- } else {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ afr_transaction_fop_failed(frame, this, child_index);
+ }
- gf_log (this->name, GF_LOG_DEBUG,
- "Blocking entrylks done. Proceeding to FOP");
- afr_internal_lock_finish (frame, this);
- }
-
- return 0;
-}
+ if (xattr)
+ local->transaction.changelog_xdata[child_index] = dict_ref(xattr);
+ call_count = afr_frame_return(frame);
-int
-afr_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ if (call_count == 0) {
+ local->transaction.changelog_resume(frame, this);
+ }
- local = frame->local;
- int_lock = &local->internal_lock;
+ return 0;
+}
- /* Initiate blocking locks if non-blocking has failed */
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking entrylks failed. Proceeding to blocking");
- int_lock->lock_cbk = afr_post_blocking_entrylk_cbk;
- afr_blocking_lock (frame, this);
- } else {
+void
+afr_changelog_populate_xdata(call_frame_t *frame, afr_xattrop_type_t op,
+ dict_t **xdata, dict_t **newloc_xdata)
+{
+ int i = 0;
+ int ret = 0;
+ char *key = NULL;
+ int keylen = 0;
+ const char *name = NULL;
+ dict_t *xdata1 = NULL;
+ dict_t *xdata2 = NULL;
+ xlator_t *this = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t need_entry_key_set = _gf_true;
+
+ local = frame->local;
+ this = THIS;
+ priv = this->private;
+
+ if (local->transaction.type == AFR_DATA_TRANSACTION ||
+ local->transaction.type == AFR_METADATA_TRANSACTION)
+ goto out;
+
+ if (!priv->esh_granular)
+ goto out;
+
+ xdata1 = dict_new();
+ if (!xdata1)
+ goto out;
+
+ name = local->loc.name;
+ if (local->op == GF_FOP_LINK)
+ name = local->newloc.name;
+
+ switch (op) {
+ case AFR_TRANSACTION_PRE_OP:
+ key = GF_XATTROP_ENTRY_IN_KEY;
+ break;
+ case AFR_TRANSACTION_POST_OP:
+ if (afr_txn_nothing_failed(frame, this)) {
+ key = GF_XATTROP_ENTRY_OUT_KEY;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.failed_subvols[i])
+ continue;
+ need_entry_key_set = _gf_false;
+ break;
+ }
+ /* If the transaction itself did not fail and there
+ * are no failed subvolumes, check whether the fop
+ * failed due to a symmetric error. If it did, do
+ * not set the ENTRY_OUT xattr which would end up
+ * deleting a name index which was created possibly by
+ * an earlier entry txn that may have failed on some
+ * of the sub-volumes.
+ */
+ if (local->op_ret)
+ need_entry_key_set = _gf_false;
+ } else {
+ key = GF_XATTROP_ENTRY_IN_KEY;
+ }
+ break;
+ }
+
+ if (need_entry_key_set) {
+ keylen = strlen(key);
+ ret = dict_set_strn(xdata1, key, keylen, (char *)name);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
+ "%s/%s: Could not set %s key during xattrop",
+ uuid_utoa(local->loc.pargfid), local->loc.name, key);
+ if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+ xdata2 = dict_new();
+ if (!xdata2)
+ goto out;
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking entrylks done. Proceeding to FOP");
- afr_internal_lock_finish (frame, this);
+ ret = dict_set_strn(xdata2, key, keylen,
+ (char *)local->newloc.name);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
+ "%s/%s: Could not set %s key during "
+ "xattrop",
+ uuid_utoa(local->newloc.pargfid), local->newloc.name,
+ key);
}
+ }
- return 0;
+ *xdata = xdata1;
+ *newloc_xdata = xdata2;
+ xdata1 = xdata2 = NULL;
+out:
+ if (xdata1)
+ dict_unref(xdata1);
+ return;
}
-
int
-afr_post_blocking_rename_cbk (call_frame_t *frame, xlator_t *this)
+afr_changelog_prepare(xlator_t *this, call_frame_t *frame, int *call_count,
+ afr_changelog_resume_t changelog_resume,
+ afr_xattrop_type_t op, dict_t **xdata,
+ dict_t **newloc_xdata)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
- int_lock = &local->internal_lock;
+ local = frame->local;
+ priv = this->private;
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Blocking entrylks failed.");
- local->transaction.done (frame, this);
- } else {
+ *call_count = afr_changelog_call_count(
+ local->transaction.type, local->transaction.pre_op,
+ local->transaction.failed_subvols, priv->child_count);
- gf_log (this->name, GF_LOG_DEBUG,
- "Blocking entrylks done. Proceeding to FOP");
- afr_internal_lock_finish (frame, this);
- }
- return 0;
+ if (*call_count == 0) {
+ changelog_resume(frame, this);
+ return -1;
+ }
+
+ afr_changelog_populate_xdata(frame, op, xdata, newloc_xdata);
+ local->call_count = *call_count;
+
+ local->transaction.changelog_resume = changelog_resume;
+ return 0;
}
int
-afr_post_lower_unlock_cbk (call_frame_t *frame, xlator_t *this)
+afr_changelog_do(call_frame_t *frame, xlator_t *this, dict_t *xattr,
+ afr_changelog_resume_t changelog_resume, afr_xattrop_type_t op)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ dict_t *newloc_xdata = NULL;
+ int i = 0;
+ int call_count = 0;
+ int ret = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.changelog_xdata[i]) {
+ dict_unref(local->transaction.changelog_xdata[i]);
+ local->transaction.changelog_xdata[i] = NULL;
+ }
+ }
- local = frame->local;
- int_lock = &local->internal_lock;
+ ret = afr_changelog_prepare(this, frame, &call_count, changelog_resume, op,
+ &xdata, &newloc_xdata);
- GF_ASSERT (!int_lock->higher_locked);
+ if (ret)
+ return 0;
- int_lock->lock_cbk = afr_post_blocking_rename_cbk;
- afr_blocking_lock (frame, this);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i] ||
+ local->transaction.failed_subvols[i])
+ continue;
- return 0;
-}
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ if (!local->fd) {
+ STACK_WIND_COOKIE(
+ frame, afr_changelog_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->xattrop,
+ &local->loc, GF_XATTROP_ADD_ARRAY, xattr, xdata);
+ } else {
+ STACK_WIND_COOKIE(
+ frame, afr_changelog_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->fxattrop,
+ local->fd, GF_XATTROP_ADD_ARRAY, xattr, xdata);
+ }
+ break;
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ STACK_WIND_COOKIE(frame, afr_changelog_cbk, (void *)(long)i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.new_parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr, newloc_xdata);
+ call_count--;
-int
-afr_set_transaction_flock (afr_local_t *local)
+ /* fall through */
+
+ case AFR_ENTRY_TRANSACTION:
+ if (local->fd)
+ STACK_WIND_COOKIE(
+ frame, afr_changelog_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->fxattrop,
+ local->fd, GF_XATTROP_ADD_ARRAY, xattr, xdata);
+ else
+ STACK_WIND_COOKIE(frame, afr_changelog_cbk, (void *)(long)i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr, xdata);
+ break;
+ }
+
+ if (!--call_count)
+ break;
+ }
+
+ if (xdata)
+ dict_unref(xdata);
+ if (newloc_xdata)
+ dict_unref(newloc_xdata);
+ return 0;
+}
+
+static void
+afr_init_optimistic_changelog_for_txn(xlator_t *this, afr_local_t *local)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_inodelk_t *inodelk = NULL;
+ int locked_count = 0;
+ afr_private_t *priv = NULL;
- int_lock = &local->internal_lock;
- inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ priv = this->private;
- inodelk->flock.l_len = local->transaction.len;
- inodelk->flock.l_start = local->transaction.start;
- inodelk->flock.l_type = F_WRLCK;
+ locked_count = AFR_COUNT(local->transaction.pre_op, priv->child_count);
+ if (priv->optimistic_change_log && locked_count == priv->child_count)
+ local->optimistic_change_log = 1;
- return 0;
+ return;
}
int
-afr_lock_rec (call_frame_t *frame, xlator_t *this)
+afr_changelog_pre_op(call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
+ int i = 0;
+ int ret = 0;
+ int call_count = 0;
+ int op_errno = 0;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ unsigned char *locked_nodes = NULL;
+ int idx = -1;
+ gf_boolean_t pre_nop = _gf_true;
+ dict_t *xdata_req = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ idx = afr_index_for_transaction_type(local->transaction.type);
+
+ locked_nodes = afr_locked_nodes_get(local->transaction.type, int_lock);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_nodes[i]) {
+ local->transaction.pre_op[i] = 1;
+ call_count++;
+ } else {
+ local->transaction.failed_subvols[i] = 1;
+ }
+ }
+
+ afr_init_optimistic_changelog_for_txn(this, local);
+
+ if (afr_changelog_pre_op_inherit(frame, this))
+ goto next;
+
+ /* This condition should not be met with present code, as
+ * transaction.done will be called if locks are not acquired on even a
+ * single node.
+ */
+ if (call_count == 0) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+
+ /* Check if the fop can be performed on at least
+ * quorum number of nodes.
+ */
+ if (priv->quorum_count && !afr_has_fop_quorum(frame)) {
+ op_errno = int_lock->lock_op_errno;
+ if (op_errno == 0)
+ op_errno = afr_quorum_errno(priv);
+ goto err;
+ }
+
+ xdata_req = dict_new();
+ if (!xdata_req) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (call_count < priv->child_count)
+ pre_nop = _gf_false;
+
+ /* Set an all-zero pending changelog so that in the cbk, we can get the
+ * current on-disk values. In a replica 3 volume with arbiter enabled,
+ * these values are needed to arrive at a go/ no-go of the fop phase to
+ * avoid ending up in split-brain.*/
+
+ ret = afr_set_pending_dict(priv, xdata_req, local->pending);
+ if (ret < 0) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (afr_needs_changelog_update(local)) {
+ local->dirty[idx] = hton32(1);
+
+ ret = dict_set_static_bin(xdata_req, AFR_DIRTY, local->dirty,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- local = frame->local;
- int_lock = &local->internal_lock;
+ pre_nop = _gf_false;
+ local->transaction.dirtied = 1;
+ }
- int_lock->transaction_lk_type = AFR_TRANSACTION_LK;
- int_lock->domain = this->name;
+ if (pre_nop)
+ goto next;
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- afr_set_transaction_flock (local);
+ if (!local->pre_op_compat) {
+ dict_copy(xdata_req, local->xdata_req);
+ goto next;
+ }
- int_lock->lock_cbk = afr_post_nonblocking_inodelk_cbk;
+ afr_changelog_do(frame, this, xdata_req, afr_transaction_perform_fop,
+ AFR_TRANSACTION_PRE_OP);
- afr_nonblocking_inodelk (frame, this);
- break;
+ if (xdata_req)
+ dict_unref(xdata_req);
- case AFR_ENTRY_RENAME_TRANSACTION:
+ return 0;
+next:
+ afr_transaction_perform_fop(frame, this);
- int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk;
- afr_nonblocking_entrylk (frame, this);
- break;
+ if (xdata_req)
+ dict_unref(xdata_req);
- case AFR_ENTRY_TRANSACTION:
- int_lock->lk_basename = local->transaction.basename;
- if (&local->transaction.parent_loc)
- int_lock->lk_loc = &local->transaction.parent_loc;
- else
- GF_ASSERT (local->fd);
+ return 0;
+err:
+ local->internal_lock.lock_cbk = afr_transaction_done;
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk;
- afr_nonblocking_entrylk (frame, this);
- break;
- }
+ afr_handle_lock_acquire_failure(local);
- return 0;
+ if (xdata_req)
+ dict_unref(xdata_req);
+
+ return 0;
}
+int
+afr_post_nonblocking_lock_cbk(call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ /* Initiate blocking locks if non-blocking has failed */
+ if (int_lock->lock_op_ret < 0) {
+ gf_msg_debug(this->name, 0,
+ "Non blocking locks failed. Proceeding to blocking");
+ int_lock->lock_cbk = afr_internal_lock_finish;
+ afr_blocking_lock(frame, this);
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Non blocking locks done. Proceeding to FOP");
+
+ afr_internal_lock_finish(frame, this);
+ }
+
+ return 0;
+}
int
-afr_lock (call_frame_t *frame, xlator_t *this)
+afr_post_blocking_rename_cbk(call_frame_t *frame, xlator_t *this)
{
- afr_set_lock_number (frame, this);
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
- return afr_lock_rec (frame, this);
-}
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ if (int_lock->lock_op_ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_INTERNAL_LKS_FAILED,
+ "Blocking entrylks failed.");
-/* }}} */
+ afr_transaction_done(frame, this);
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Blocking entrylks done. Proceeding to FOP");
+
+ afr_internal_lock_finish(frame, this);
+ }
+ return 0;
+}
int
-afr_internal_lock_finish (call_frame_t *frame, xlator_t *this)
+afr_post_lower_unlock_cbk(call_frame_t *frame, xlator_t *this)
{
- if (__fop_changelog_needed (frame, this)) {
- afr_changelog_pre_op (frame, this);
- } else {
- afr_transaction_perform_fop (frame, this);
- }
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
- return 0;
-}
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ GF_ASSERT(!int_lock->higher_locked);
-void
-afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this)
+ int_lock->lock_cbk = afr_post_blocking_rename_cbk;
+ afr_blocking_lock(frame, this);
+
+ return 0;
+}
+
+int
+afr_set_transaction_flock(xlator_t *this, afr_local_t *local,
+ afr_lockee_t *lockee)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
+ struct gf_flock *flock = NULL;
+
+ priv = this->private;
+ flock = &lockee->flock;
+
+ if ((priv->arbiter_count || local->transaction.eager_lock_on ||
+ priv->full_lock) &&
+ local->transaction.type == AFR_DATA_TRANSACTION) {
+ /*Lock entire file to avoid network split brains.*/
+ flock->l_len = 0;
+ flock->l_start = 0;
+ } else {
+ flock->l_len = local->transaction.len;
+ flock->l_start = local->transaction.start;
+ }
+ flock->l_type = F_WRLCK;
+
+ return 0;
+}
- /* call this function from any of the related optimizations
- which benefit from delaying post op are enabled, namely:
+int
+afr_lock(call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
- - changelog piggybacking
- - eager locking
- */
+ local = frame->local;
+ int_lock = &local->internal_lock;
- priv = this->private;
- if (!priv)
- return;
+ int_lock->lock_cbk = afr_post_nonblocking_lock_cbk;
+ int_lock->domain = this->name;
- if (!priv->post_op_delay_secs)
- return;
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ for (i = 0; i < int_lock->lockee_count; i++) {
+ afr_set_transaction_flock(this, local, &int_lock->lockee[i]);
+ }
- local = frame->local;
- if (!local)
- return;
+ break;
- if (!local->transaction.eager_lock_on)
- return;
+ case AFR_ENTRY_TRANSACTION:
+ int_lock->lk_basename = local->transaction.basename;
+ if (local->transaction.parent_loc.path)
+ int_lock->lk_loc = &local->transaction.parent_loc;
+ else
+ GF_ASSERT(local->fd);
+ break;
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ break;
+ }
+ afr_lock_nonblocking(frame, this);
- if (!local->fd)
- return;
+ return 0;
+}
- if (local->op == GF_FOP_WRITE)
- local->delayed_post_op = _gf_true;
+static gf_boolean_t
+afr_locals_overlap(afr_local_t *local1, afr_local_t *local2)
+{
+ uint64_t start1 = local1->transaction.start;
+ uint64_t start2 = local2->transaction.start;
+ uint64_t end1 = 0;
+ uint64_t end2 = 0;
+
+ if (local1->transaction.len)
+ end1 = start1 + local1->transaction.len - 1;
+ else
+ end1 = ULLONG_MAX;
+
+ if (local2->transaction.len)
+ end2 = start2 + local2->transaction.len - 1;
+ else
+ end2 = ULLONG_MAX;
+
+ return ((end1 >= start2) && (end2 >= start1));
}
gf_boolean_t
-afr_are_multiple_fds_opened (fd_t *fd, xlator_t *this)
+afr_has_lock_conflict(afr_local_t *local, gf_boolean_t waitlist_check)
{
- afr_fd_ctx_t *fd_ctx = NULL;
-
- if (!fd) {
- /* If false is returned, it may keep on taking eager-lock
- * which may lead to starvation, so return true to avoid that.
- */
- gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid fd");
- return _gf_true;
+ afr_local_t *each = NULL;
+ afr_lock_t *lock = NULL;
+
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ /*
+ * Once full file lock is acquired in eager-lock phase, overlapping
+ * writes do not compete for inode-locks, instead are transferred to the
+ * next writes. Because of this overlapping writes are not ordered.
+ * This can cause inconsistencies in replication.
+ * Example:
+ * Two overlapping writes w1, w2 are sent in parallel on same fd
+ * in two threads t1, t2.
+ * Both threads can execute afr_writev_wind in the following manner.
+ * t1 winds w1 on brick-0
+ * t2 winds w2 on brick-0
+ * t2 winds w2 on brick-1
+ * t1 winds w1 on brick-1
+ *
+ * This check makes sure the locks are not transferred for
+ * overlapping writes.
+ */
+ list_for_each_entry(each, &lock->owners, transaction.owner_list)
+ {
+ if (afr_locals_overlap(each, local)) {
+ return _gf_true;
}
- /* Lets say mount1 has eager-lock(full-lock) and after the eager-lock
- * is taken mount2 opened the same file, it won't be able to
- * perform any data operations until mount1 releases eager-lock.
- * To avoid such scenario do not enable eager-lock for this transaction
- * if open-fd-count is > 1
- */
-
- fd_ctx = afr_fd_ctx_get (fd, this);
- if (!fd_ctx)
- return _gf_true;
-
- if (fd_ctx->open_fd_count > 1)
- return _gf_true;
+ }
+ if (!waitlist_check)
return _gf_false;
+ list_for_each_entry(each, &lock->waiting, transaction.wait_list)
+ {
+ if (afr_locals_overlap(each, local)) {
+ return _gf_true;
+ }
+ }
+ return _gf_false;
}
+/* }}} */
+static void
+afr_copy_inodelk_vars(afr_internal_lock_t *dst, afr_internal_lock_t *src,
+ xlator_t *this, int lockee_num)
+{
+ afr_private_t *priv = this->private;
+ afr_lockee_t *sl = &src->lockee[lockee_num];
+ afr_lockee_t *dl = &dst->lockee[lockee_num];
+
+ dst->domain = src->domain;
+ dl->flock.l_len = sl->flock.l_len;
+ dl->flock.l_start = sl->flock.l_start;
+ dl->flock.l_type = sl->flock.l_type;
+ dl->locked_count = sl->locked_count;
+ memcpy(dl->locked_nodes, sl->locked_nodes,
+ priv->child_count * sizeof(*dl->locked_nodes));
+}
-gf_boolean_t
-is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this)
+void
+__afr_transaction_wake_shared(afr_local_t *local, struct list_head *shared)
{
- afr_local_t *local = NULL;
- gf_boolean_t res = _gf_false;
+ gf_boolean_t conflict = _gf_false;
+ afr_local_t *each = NULL;
+ afr_lock_t *lock = &local->inode_ctx->lock[local->transaction.type];
+
+ while (!conflict) {
+ if (list_empty(&lock->waiting))
+ return;
+ each = list_entry(lock->waiting.next, afr_local_t,
+ transaction.wait_list);
+ if (afr_has_lock_conflict(each, _gf_false)) {
+ conflict = _gf_true;
+ }
+ if (conflict && !list_empty(&lock->owners))
+ return;
+ afr_copy_inodelk_vars(&each->internal_lock, &local->internal_lock,
+ each->transaction.frame->this, 0);
+ list_move_tail(&each->transaction.wait_list, shared);
+ list_add_tail(&each->transaction.owner_list, &lock->owners);
+ }
+}
- local = frame->local;
- if (!local)
- goto out;
+static void
+afr_lock_resume_shared(struct list_head *list)
+{
+ afr_local_t *each = NULL;
+
+ while (!list_empty(list)) {
+ each = list_entry(list->next, afr_local_t, transaction.wait_list);
+ list_del_init(&each->transaction.wait_list);
+ afr_changelog_pre_op(each->transaction.frame,
+ each->transaction.frame->this);
+ }
+}
- if (!local->delayed_post_op)
- goto out;
+int
+afr_internal_lock_finish(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = frame->local;
+ afr_lock_t *lock = NULL;
+
+ local->internal_lock.lock_cbk = NULL;
+ if (!local->transaction.eager_lock_on) {
+ if (local->internal_lock.lock_op_ret < 0) {
+ afr_transaction_done(frame, this);
+ return 0;
+ }
+ afr_changelog_pre_op(frame, this);
+ } else {
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ if (local->internal_lock.lock_op_ret < 0) {
+ afr_handle_lock_acquire_failure(local);
+ } else {
+ lock->event_generation = local->event_generation;
+ afr_changelog_pre_op(frame, this);
+ }
+ }
- //Mark pending changelog ASAP
- if (!afr_txn_nothing_failed (frame, this))
- goto out;
+ return 0;
+}
- if (local->fd && afr_are_multiple_fds_opened (local->fd, this))
- goto out;
+gf_boolean_t
+afr_are_conflicting_ops_waiting(afr_local_t *local, xlator_t *this)
+{
+ afr_lock_t *lock = NULL;
+ lock = &local->inode_ctx->lock[local->transaction.type];
+
+ /* Lets say mount1 has eager-lock(full-lock) and after the eager-lock
+ * is taken mount2 opened the same file, it won't be able to
+ * perform any {meta,}data operations until mount1 releases eager-lock.
+ * To avoid such scenario do not enable eager-lock for this transaction
+ * if open-fd-count is > 1 for metadata transactions and if num-inodelks > 1
+ * for data transactions
+ */
+
+ if (local->transaction.type == AFR_METADATA_TRANSACTION) {
+ if (local->inode_ctx->open_fd_count > 1) {
+ return _gf_true;
+ }
+ } else if (local->transaction.type == AFR_DATA_TRANSACTION) {
+ if (lock->num_inodelks > 1) {
+ return _gf_true;
+ }
+ }
- res = _gf_true;
-out:
- return res;
+ return _gf_false;
}
+gf_boolean_t
+afr_is_delayed_changelog_post_op_needed(call_frame_t *frame, xlator_t *this,
+ int delay)
+{
+ afr_local_t *local = NULL;
+ afr_lock_t *lock = NULL;
+ gf_boolean_t res = _gf_false;
+
+ local = frame->local;
+ lock = &local->inode_ctx->lock[local->transaction.type];
+
+ if (!afr_txn_nothing_failed(frame, this)) {
+ lock->release = _gf_true;
+ goto out;
+ }
+
+ if (afr_are_conflicting_ops_waiting(local, this)) {
+ lock->release = _gf_true;
+ goto out;
+ }
+
+ if (!list_empty(&lock->owners))
+ goto out;
+ else
+ GF_ASSERT(list_empty(&lock->waiting));
+
+ if (lock->release) {
+ goto out;
+ }
+
+ if (!delay) {
+ goto out;
+ }
+
+ if (local->transaction.disable_delayed_post_op) {
+ goto out;
+ }
+
+ if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP) &&
+ (local->op != GF_FOP_FSYNC)) {
+ /*Only allow writes/fsyncs but shard does [f]xattrops on writes, so
+ * they are fine too*/
+ goto out;
+ }
+
+ res = _gf_true;
+out:
+ return res;
+}
void
-afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
- call_stub_t *stub);
-
-void
-afr_delayed_changelog_wake_up_cbk (void *data)
+afr_delayed_changelog_wake_up_cbk(void *data)
{
- fd_t *fd = NULL;
-
- fd = data;
-
- afr_delayed_changelog_wake_up (THIS, fd);
+ afr_lock_t *lock = NULL;
+ afr_local_t *local = data;
+ afr_local_t *timer_local = NULL;
+ struct list_head shared;
+
+ INIT_LIST_HEAD(&shared);
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ LOCK(&local->inode->lock);
+ {
+ timer_local = list_entry(lock->post_op.next, afr_local_t,
+ transaction.owner_list);
+ if (list_empty(&lock->owners) && (local == timer_local)) {
+ GF_ASSERT(list_empty(&lock->waiting));
+ /*Last owner*/
+ lock->release = _gf_true;
+ lock->delay_timer = NULL;
+ }
+ }
+ UNLOCK(&local->inode->lock);
+ afr_changelog_post_op_now(local->transaction.frame,
+ local->transaction.frame->this);
}
-
/* SET operation */
int
-afr_fd_report_unstable_write (xlator_t *this, fd_t *fd)
+afr_fd_report_unstable_write(xlator_t *this, afr_local_t *local)
{
- afr_fd_ctx_t *fdctx = NULL;
-
- fdctx = afr_fd_ctx_get (fd, this);
-
- LOCK(&fd->lock);
- {
- fdctx->witnessed_unstable_write = _gf_true;
- }
- UNLOCK(&fd->lock);
+ LOCK(&local->inode->lock);
+ {
+ local->inode_ctx->witnessed_unstable_write = _gf_true;
+ }
+ UNLOCK(&local->inode->lock);
- return 0;
+ return 0;
}
/* TEST and CLEAR operation */
gf_boolean_t
-afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd)
+afr_fd_has_witnessed_unstable_write(xlator_t *this, inode_t *inode)
{
- afr_fd_ctx_t *fdctx = NULL;
- gf_boolean_t witness = _gf_false;
+ afr_inode_ctx_t *ctx = NULL;
+ gf_boolean_t witness = _gf_false;
- fdctx = afr_fd_ctx_get (fd, this);
- if (!fdctx)
- return _gf_true;
+ LOCK(&inode->lock);
+ {
+ (void)__afr_inode_ctx_get(this, inode, &ctx);
- LOCK(&fd->lock);
- {
- if (fdctx->witnessed_unstable_write) {
- witness = _gf_true;
- fdctx->witnessed_unstable_write = _gf_false;
- }
+ if (ctx->witnessed_unstable_write) {
+ witness = _gf_true;
+ ctx->witnessed_unstable_write = _gf_false;
}
- UNLOCK (&fd->lock);
+ }
+ UNLOCK(&inode->lock);
- return witness;
+ return witness;
}
-
int
-afr_changelog_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *pre,
- struct iatt *post, dict_t *xdata)
+afr_changelog_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- int child_index = (long) cookie;
- int call_count = -1;
- afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = (long)cookie;
+ int call_count = -1;
+ afr_local_t *local = NULL;
- priv = this->private;
- local = frame->local;
+ priv = this->private;
+ local = frame->local;
- if (op_ret != 0) {
- /* Failure of fsync() is as good as failure of previous
- write(). So treat it like one.
- */
- gf_log (this->name, GF_LOG_WARNING,
- "fsync(%s) failed on subvolume %s. Transaction was %s",
- uuid_utoa (local->fd->inode->gfid),
- priv->children[child_index]->name,
- gf_fop_list[local->op]);
+ if (op_ret != 0) {
+ /* Failure of fsync() is as good as failure of previous
+ write(). So treat it like one.
+ */
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, AFR_MSG_FSYNC_FAILED,
+ "fsync(%s) failed on subvolume %s. Transaction was %s",
+ uuid_utoa(local->fd->inode->gfid),
+ priv->children[child_index]->name, gf_fop_list[local->op]);
- afr_transaction_fop_failed (frame, this, child_index);
- }
+ afr_transaction_fop_failed(frame, this, child_index);
+ }
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return(frame);
- if (call_count == 0)
- afr_changelog_post_op_now (frame, this);
+ if (call_count == 0)
+ afr_changelog_post_op_now(frame, this);
- return 0;
+ return 0;
}
-
int
-afr_changelog_fsync (call_frame_t *frame, xlator_t *this)
+afr_changelog_fsync(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- int i = 0;
- int call_count = 0;
- afr_private_t *priv = NULL;
- dict_t *xdata = NULL;
- GF_UNUSED int ret = -1;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ GF_UNUSED int ret = -1;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- call_count = AFR_COUNT (local->transaction.pre_op, priv->child_count);
+ call_count = AFR_COUNT(local->transaction.pre_op, priv->child_count);
- if (!call_count) {
- /* will go straight to unlock */
- afr_changelog_post_op_now (frame, this);
- return 0;
- }
+ if (!call_count) {
+ /* will go straight to unlock */
+ afr_changelog_post_op_now(frame, this);
+ return 0;
+ }
- local->call_count = call_count;
+ local->call_count = call_count;
- xdata = dict_new();
- if (xdata)
- ret = dict_set_int32 (xdata, "batch-fsync", 1);
+ xdata = dict_new();
+ if (xdata) {
+ ret = dict_set_int32_sizen(xdata, "batch-fsync", 1);
+ ret = dict_set_str(xdata, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (!local->transaction.pre_op[i])
- continue;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i])
+ continue;
- STACK_WIND_COOKIE (frame, afr_changelog_fsync_cbk,
- (void *) (long) i, priv->children[i],
- priv->children[i]->fops->fsync, local->fd,
- 1, xdata);
- if (!--call_count)
- break;
- }
+ STACK_WIND_COOKIE(frame, afr_changelog_fsync_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->fsync,
+ local->fd, 1, xdata);
+ if (!--call_count)
+ break;
+ }
- if (xdata)
- dict_unref (xdata);
+ if (xdata)
+ dict_unref(xdata);
- return 0;
+ return 0;
}
-
int
-afr_changelog_post_op_safe (call_frame_t *frame, xlator_t *this)
+afr_changelog_post_op_safe(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- if (!local->fd || local->transaction.type != AFR_DATA_TRANSACTION) {
- afr_changelog_post_op_now (frame, this);
- return 0;
- }
-
- if (afr_changelog_pre_op_uninherit (frame, this) &&
- afr_txn_nothing_failed (frame, this)) {
- /* just detected that this post-op is about to
- be optimized away as a new write() has
- already piggybacked on this frame's changelog.
- */
- afr_changelog_post_op_now (frame, this);
- return 0;
- }
+ if (!local->fd || local->transaction.type != AFR_DATA_TRANSACTION) {
+ afr_changelog_post_op_now(frame, this);
+ return 0;
+ }
- /* Calling afr_changelog_post_op_now() now will result in
- issuing ->[f]xattrop().
-
- Performing a hard POST-OP (->[f]xattrop() FOP) is a more
- responsible operation that what it might appear on the surface.
-
- The changelog of a file (in the xattr of the file on the server)
- stores information (pending count) about the state of the file
- on the OTHER server. This changelog is blindly trusted, and must
- therefore be updated in such a way it remains trustworthy. This
- implies that decrementing the pending count (essentially "clearing
- the dirty flag") must be done STRICTLY after we are sure that the
- operation on the other server has reached stable storage.
-
- While the backend filesystem on that server will eventually flush
- it to stable storage, we (being in userspace) have no mechanism
- to get notified when the write became "stable".
-
- This means we need take matter into our own hands and issue an
- fsync() EVEN IF THE APPLICATION WAS PERFORMING UNSTABLE WRITES,
- and get an acknowledgement for it. And we need to wait for the
- fsync() acknowledgement before initiating the hard POST-OP.
-
- However if the FD itself was opened in O_SYNC or O_DSYNC then
- we are already guaranteed that the writes were made stable as
- part of the FOP itself. The same holds true for NFS stable
- writes which happen on an anonymous FD with O_DSYNC or O_SYNC
- flag set in the writev() @flags param. For all other write types,
- mark a flag in the fdctx whenever an unstable write is witnessed.
+ if (afr_changelog_pre_op_uninherit(frame, this) &&
+ afr_txn_nothing_failed(frame, this)) {
+ /* just detected that this post-op is about to
+ be optimized away as a new write() has
+ already piggybacked on this frame's changelog.
*/
-
- if (!afr_fd_has_witnessed_unstable_write (this, local->fd)) {
- afr_changelog_post_op_now (frame, this);
- return 0;
- }
-
- /* Check whether users want durability and perform fsync/post-op
- * accordingly.
- */
- if (priv->ensure_durability) {
- /* Time to fsync() */
- afr_changelog_fsync (frame, this);
- } else {
- afr_changelog_post_op_now (frame, this);
- }
-
+ afr_changelog_post_op_now(frame, this);
+ return 0;
+ }
+
+ /* Calling afr_changelog_post_op_now() now will result in
+ issuing ->[f]xattrop().
+
+ Performing a hard POST-OP (->[f]xattrop() FOP) is a more
+ responsible operation that what it might appear on the surface.
+
+ The changelog of a file (in the xattr of the file on the server)
+ stores information (pending count) about the state of the file
+ on the OTHER server. This changelog is blindly trusted, and must
+ therefore be updated in such a way it remains trustworthy. This
+ implies that decrementing the pending count (essentially "clearing
+ the dirty flag") must be done STRICTLY after we are sure that the
+ operation on the other server has reached stable storage.
+
+ While the backend filesystem on that server will eventually flush
+ it to stable storage, we (being in userspace) have no mechanism
+ to get notified when the write became "stable".
+
+ This means we need take matter into our own hands and issue an
+ fsync() EVEN IF THE APPLICATION WAS PERFORMING UNSTABLE WRITES,
+ and get an acknowledgement for it. And we need to wait for the
+ fsync() acknowledgement before initiating the hard POST-OP.
+
+ However if the FD itself was opened in O_SYNC or O_DSYNC then
+ we are already guaranteed that the writes were made stable as
+ part of the FOP itself. The same holds true for NFS stable
+ writes which happen on an anonymous FD with O_DSYNC or O_SYNC
+ flag set in the writev() @flags param. For all other write types,
+ mark a flag in the fdctx whenever an unstable write is witnessed.
+ */
+
+ if (!afr_fd_has_witnessed_unstable_write(this, local->inode)) {
+ afr_changelog_post_op_now(frame, this);
return 0;
+ }
+
+ /* Check whether users want durability and perform fsync/post-op
+ * accordingly.
+ */
+ if (priv->ensure_durability) {
+ /* Time to fsync() */
+ afr_changelog_fsync(frame, this);
+ } else {
+ afr_changelog_post_op_now(frame, this);
+ }
+
+ return 0;
}
-
void
-afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
- call_stub_t *stub)
+afr_changelog_post_op(call_frame_t *frame, xlator_t *this)
{
- afr_fd_ctx_t *fd_ctx = NULL;
- call_frame_t *prev_frame = NULL;
- struct timespec delta = {0, };
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- priv = this->private;
-
- fd_ctx = afr_fd_ctx_get (fd, this);
- if (!fd_ctx)
- goto out;
+ struct timespec delta = {
+ 0,
+ };
+ afr_private_t *priv = NULL;
+ afr_local_t *local = frame->local;
+ afr_lock_t *lock = NULL;
+ gf_boolean_t post_op = _gf_true;
+ struct list_head shared;
+
+ priv = this->private;
+ delta.tv_sec = priv->post_op_delay_secs;
+ delta.tv_nsec = 0;
+
+ INIT_LIST_HEAD(&shared);
+ if (!local->transaction.eager_lock_on)
+ goto out;
+
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ LOCK(&local->inode->lock);
+ {
+ list_del_init(&local->transaction.owner_list);
+ list_add(&local->transaction.owner_list, &lock->post_op);
+ __afr_transaction_wake_shared(local, &shared);
+
+ if (!afr_is_delayed_changelog_post_op_needed(frame, this,
+ delta.tv_sec)) {
+ if (list_empty(&lock->owners))
+ lock->release = _gf_true;
+ goto unlock;
+ }
- delta.tv_sec = priv->post_op_delay_secs;
- delta.tv_nsec = 0;
-
- pthread_mutex_lock (&fd_ctx->delay_lock);
- {
- prev_frame = fd_ctx->delay_frame;
- fd_ctx->delay_frame = NULL;
- if (fd_ctx->delay_timer)
- gf_timer_call_cancel (this->ctx, fd_ctx->delay_timer);
- fd_ctx->delay_timer = NULL;
- if (!frame)
- goto unlock;
- fd_ctx->delay_timer = gf_timer_call_after (this->ctx, delta,
- afr_delayed_changelog_wake_up_cbk,
- fd);
- fd_ctx->delay_frame = frame;
- }
+ GF_ASSERT(lock->delay_timer == NULL);
+ lock->delay_timer = gf_timer_call_after(
+ this->ctx, delta, afr_delayed_changelog_wake_up_cbk, local);
+ if (!lock->delay_timer) {
+ lock->release = _gf_true;
+ } else {
+ post_op = _gf_false;
+ }
+ }
unlock:
- pthread_mutex_unlock (&fd_ctx->delay_lock);
-
-out:
- if (prev_frame) {
- local = prev_frame->local;
- local->transaction.resume_stub = stub;
- afr_changelog_post_op_now (prev_frame, this);
- } else if (stub) {
- call_resume (stub);
- }
-}
-
-
-void
-afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- if (is_afr_delayed_changelog_post_op_needed (frame, this))
- afr_delayed_changelog_post_op (this, frame, local->fd, NULL);
- else
- afr_changelog_post_op_safe (frame, this);
-}
+ UNLOCK(&local->inode->lock);
+ if (!list_empty(&shared)) {
+ afr_lock_resume_shared(&shared);
+ }
-
-/* Wake up the sleeping/delayed post-op, and also register
- a stub to have it resumed after this transaction
- completely finishes.
-
- The @stub gets saved in @local and gets resumed in
- afr_local_cleanup()
- */
-void
-afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub)
-{
- afr_delayed_changelog_post_op (this, NULL, fd, stub);
-}
-
-
-void
-afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd)
-{
- afr_delayed_changelog_post_op (this, NULL, fd, NULL);
+out:
+ if (post_op) {
+ if (!local->transaction.eager_lock_on || lock->release) {
+ afr_changelog_post_op_safe(frame, this);
+ } else {
+ afr_changelog_post_op_now(frame, this);
+ }
+ }
}
-
int
-afr_transaction_resume (call_frame_t *frame, xlator_t *this)
+afr_transaction_resume(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (local->transaction.eager_lock_on) {
- /* We don't need to retain "local" in the
- fd list anymore, writes to all subvols
- are finished by now */
- afr_remove_eager_lock_stub (local);
- }
-
- afr_restore_lk_owner (frame);
+ afr_restore_lk_owner(frame);
- afr_handle_symmetric_errors (frame, this);
+ afr_handle_symmetric_errors(frame, this);
- if (!local->pre_op_compat)
- /* new mode, pre-op was done along
- with OP */
- afr_changelog_pre_op_update (frame, this);
+ if (!local->pre_op_compat)
+ /* new mode, pre-op was done along
+ with OP */
+ afr_changelog_pre_op_update(frame, this);
- if (__fop_changelog_needed (frame, this)) {
- afr_changelog_post_op (frame, this);
- } else {
- afr_changelog_post_op_done (frame, this);
- }
+ afr_changelog_post_op(frame, this);
- return 0;
+ return 0;
}
-
/**
* afr_transaction_fop_failed - inform that an fop failed
*/
void
-afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
- int child_index)
+afr_transaction_fop_failed(call_frame_t *frame, xlator_t *this, int child_index)
{
- afr_local_t * local = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- local->transaction.failed_subvols[child_index] = 1;
+ local->transaction.failed_subvols[child_index] = 1;
}
-
-
- static gf_boolean_t
-afr_locals_overlap (afr_local_t *local1, afr_local_t *local2)
+static gf_boolean_t
+__need_previous_lock_unlocked(afr_local_t *local)
{
- uint64_t start1 = local1->transaction.start;
- uint64_t start2 = local2->transaction.start;
- uint64_t end1 = 0;
- uint64_t end2 = 0;
+ afr_lock_t *lock = NULL;
- if (local1->transaction.len)
- end1 = start1 + local1->transaction.len - 1;
- else
- end1 = ULLONG_MAX;
-
- if (local2->transaction.len)
- end2 = start2 + local2->transaction.len - 1;
- else
- end2 = ULLONG_MAX;
-
- return ((end1 >= start2) && (end2 >= start1));
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ if (!lock->acquired)
+ return _gf_false;
+ if (lock->acquired && lock->event_generation != local->event_generation)
+ return _gf_true;
+ return _gf_false;
}
void
-afr_transaction_eager_lock_init (afr_local_t *local, xlator_t *this)
+__afr_eager_lock_handle(afr_local_t *local, gf_boolean_t *take_lock,
+ gf_boolean_t *do_pre_op, afr_local_t **timer_local)
{
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fdctx = NULL;
- afr_local_t *each = NULL;
-
- priv = this->private;
-
- if (!local->fd)
- return;
-
- if (local->transaction.type != AFR_DATA_TRANSACTION)
- return;
-
- if (!priv->eager_lock)
- return;
-
- fdctx = afr_fd_ctx_get (local->fd, this);
- if (!fdctx)
- return;
-
- if (afr_are_multiple_fds_opened (local->fd, this))
- return;
- /*
- * Once full file lock is acquired in eager-lock phase, overlapping
- * writes do not compete for inode-locks, instead are transferred to the
- * next writes. Because of this overlapping writes are not ordered.
- * This can cause inconsistencies in replication.
- * Example:
- * Two overlapping writes w1, w2 are sent in parallel on same fd
- * in two threads t1, t2.
- * Both threads can execute afr_writev_wind in the following manner.
- * t1 winds w1 on brick-0
- * t2 winds w2 on brick-0
- * t2 winds w2 on brick-1
- * t1 winds w1 on brick-1
- *
- * This check makes sure the locks are not transferred for
- * overlapping writes.
- */
- LOCK (&local->fd->lock);
- {
- list_for_each_entry (each, &fdctx->eager_locked,
- transaction.eager_locked) {
- if (afr_locals_overlap (each, local)) {
- local->transaction.eager_lock_on = _gf_false;
- goto unlock;
- }
- }
+ afr_lock_t *lock = NULL;
+ afr_local_t *owner_local = NULL;
+ xlator_t *this = local->transaction.frame->this;
+
+ local->transaction.eager_lock_on = _gf_true;
+ afr_set_lk_owner(local->transaction.frame, this, local->inode);
+
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ if (__need_previous_lock_unlocked(local)) {
+ if (!list_empty(&lock->owners)) {
+ lock->release = _gf_true;
+ } else if (lock->delay_timer) {
+ lock->release = _gf_true;
+ if (gf_timer_call_cancel(this->ctx, lock->delay_timer)) {
+ /* It will be put in frozen list
+ * in the code flow below*/
+ } else {
+ *timer_local = list_entry(lock->post_op.next, afr_local_t,
+ transaction.owner_list);
+ lock->delay_timer = NULL;
+ }
+ }
+ }
+
+ if (lock->release) {
+ list_add_tail(&local->transaction.wait_list, &lock->frozen);
+ *take_lock = _gf_false;
+ goto out;
+ }
+
+ if (lock->delay_timer) {
+ *take_lock = _gf_false;
+ if (gf_timer_call_cancel(this->ctx, lock->delay_timer)) {
+ list_add_tail(&local->transaction.wait_list, &lock->frozen);
+ } else {
+ *timer_local = list_entry(lock->post_op.next, afr_local_t,
+ transaction.owner_list);
+ afr_copy_inodelk_vars(&local->internal_lock,
+ &(*timer_local)->internal_lock, this, 0);
+ lock->delay_timer = NULL;
+ *do_pre_op = _gf_true;
+ list_add_tail(&local->transaction.owner_list, &lock->owners);
+ }
+ goto out;
+ }
- local->transaction.eager_lock_on = _gf_true;
- list_add_tail (&local->transaction.eager_locked,
- &fdctx->eager_locked);
+ if (!list_empty(&lock->owners)) {
+ if (!lock->acquired || afr_has_lock_conflict(local, _gf_true)) {
+ list_add_tail(&local->transaction.wait_list, &lock->waiting);
+ *take_lock = _gf_false;
+ goto out;
}
-unlock:
- UNLOCK (&local->fd->lock);
+ owner_local = list_entry(lock->owners.next, afr_local_t,
+ transaction.owner_list);
+ afr_copy_inodelk_vars(&local->internal_lock,
+ &owner_local->internal_lock, this, 0);
+ *take_lock = _gf_false;
+ *do_pre_op = _gf_true;
+ }
+
+ if (lock->acquired)
+ GF_ASSERT(!(*take_lock));
+ list_add_tail(&local->transaction.owner_list, &lock->owners);
+out:
+ return;
}
+void
+afr_transaction_start(afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ gf_boolean_t take_lock = _gf_true;
+ gf_boolean_t do_pre_op = _gf_false;
+ afr_local_t *timer_local = NULL;
+
+ priv = this->private;
+
+ if (local->transaction.type != AFR_DATA_TRANSACTION &&
+ local->transaction.type != AFR_METADATA_TRANSACTION)
+ goto lock_phase;
+
+ if (!priv->eager_lock)
+ goto lock_phase;
+
+ LOCK(&local->inode->lock);
+ {
+ __afr_eager_lock_handle(local, &take_lock, &do_pre_op, &timer_local);
+ }
+ UNLOCK(&local->inode->lock);
+lock_phase:
+ if (!local->transaction.eager_lock_on) {
+ afr_set_lk_owner(local->transaction.frame, this,
+ local->transaction.frame->root);
+ }
+
+ if (take_lock) {
+ afr_lock(local->transaction.frame, this);
+ } else if (do_pre_op) {
+ afr_changelog_pre_op(local->transaction.frame, this);
+ }
+ /*Always call delayed_changelog_wake_up_cbk after calling pre-op above
+ * so that any inheriting can happen*/
+ if (timer_local)
+ afr_delayed_changelog_wake_up_cbk(timer_local);
+}
int
-afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
+afr_write_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- fd_t *fd = NULL;
- int ret = -1;
-
- local = frame->local;
- priv = this->private;
-
- local->transaction.resume = afr_transaction_resume;
- local->transaction.type = type;
-
- ret = afr_transaction_local_init (local, this);
- if (ret < 0)
- goto out;
+ afr_local_t *local = frame->local;
+
+ if (err) {
+ AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN(-1, err);
+ goto fail;
+ }
+
+ afr_transaction_start(local, this);
+ return 0;
+fail:
+ local->transaction.unwind(frame, this);
+ AFR_STACK_DESTROY(frame);
+ return 0;
+}
- afr_transaction_eager_lock_init (local, this);
+int
+afr_transaction_lockee_init(call_frame_t *frame)
+{
+ afr_local_t *local = frame->local;
+ afr_internal_lock_t *int_lock = &local->internal_lock;
+ afr_private_t *priv = frame->this->private;
+ int ret = 0;
- if (local->fd && local->transaction.eager_lock_on)
- afr_set_lk_owner (frame, this, local->fd);
- else
- afr_set_lk_owner (frame, this, frame->root);
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ ret = afr_add_inode_lockee(local, priv->child_count);
+ break;
- if (!local->transaction.eager_lock_on && local->loc.inode) {
- fd = fd_lookup (local->loc.inode, frame->root->pid);
- if (fd == NULL)
- fd = fd_lookup_anonymous (local->loc.inode);
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ ret = afr_add_entry_lockee(local, &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret) {
+ goto out;
+ }
+ if (local->op == GF_FOP_RENAME) {
+ ret = afr_add_entry_lockee(
+ local, &local->transaction.new_parent_loc,
+ local->transaction.new_basename, priv->child_count);
+ if (ret) {
+ goto out;
+ }
- if (fd) {
- afr_delayed_changelog_wake_up (this, fd);
- fd_unref (fd);
+ if (local->newloc.inode &&
+ IA_ISDIR(local->newloc.inode->ia_type)) {
+ ret = afr_add_entry_lockee(local, &local->newloc, NULL,
+ priv->child_count);
+ if (ret) {
+ goto out;
+ }
}
- }
+ } else if (local->op == GF_FOP_RMDIR) {
+ ret = afr_add_entry_lockee(local, &local->loc, NULL,
+ priv->child_count);
+ if (ret) {
+ goto out;
+ }
+ }
+
+ if (int_lock->lockee_count > 1) {
+ qsort(int_lock->lockee, int_lock->lockee_count,
+ sizeof(*int_lock->lockee), afr_entry_lockee_cmp);
+ }
+ break;
+ }
+out:
+ return ret;
+}
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- afr_internal_lock_finish (frame, this);
- } else {
- afr_lock (frame, this);
- }
+int
+afr_transaction(call_frame_t *frame, xlator_t *this, afr_transaction_type type)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ int event_generation = 0;
+
+ local = frame->local;
+ priv = this->private;
+ local->transaction.frame = frame;
+
+ local->transaction.type = type;
+
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
+ ret = -afr_quorum_errno(priv);
+ goto out;
+ }
+
+ if (!afr_is_consistent_io_possible(local, priv, &ret)) {
+ ret = -ret; /*op_errno to ret conversion*/
+ goto out;
+ }
+
+ if (priv->thin_arbiter_count && !afr_ta_has_quorum(priv, local)) {
+ ret = -afr_quorum_errno(priv);
+ goto out;
+ }
+
+ ret = afr_transaction_local_init(local, this);
+ if (ret < 0)
+ goto out;
+
+ ret = afr_transaction_lockee_init(frame);
+ if (ret)
+ goto out;
+
+ if (type != AFR_METADATA_TRANSACTION) {
+ goto txn_start;
+ }
+
+ ret = afr_inode_get_readable(frame, local->inode, this, local->readable,
+ &event_generation, type);
+ if (ret < 0 ||
+ afr_is_inode_refresh_reqd(local->inode, this, priv->event_generation,
+ event_generation)) {
+ afr_inode_refresh(frame, this, local->inode, local->loc.gfid,
+ afr_write_txn_refresh_done);
ret = 0;
+ goto out;
+ }
+
+txn_start:
+ ret = 0;
+ afr_transaction_start(local, this);
out:
- return ret;
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index c3ce333b771..beefa26f4a6 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -14,41 +14,62 @@
#include "afr.h"
void
-afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
- int child_index);
+afr_transaction_fop_failed(call_frame_t *frame, xlator_t *this,
+ int child_index);
+
+int32_t
+afr_transaction(call_frame_t *frame, xlator_t *this, afr_transaction_type type);
int
-afr_lock_server_count (afr_private_t *priv, afr_transaction_type type);
+afr_set_pending_dict(afr_private_t *priv, dict_t *xattr, int32_t **pending);
-afr_inodelk_t*
-afr_get_inodelk (afr_internal_lock_t *int_lock, char *dom);
+void
+afr_delayed_changelog_wake_up(xlator_t *this, fd_t *fd);
-int32_t
-afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type);
+void
+__mark_all_success(call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+afr_txn_nothing_failed(call_frame_t *frame, xlator_t *this);
int
-afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending);
+afr_read_txn(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ afr_read_txn_wind_t readfn, afr_transaction_type type);
-void
-afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this);
+int
+afr_read_txn_continue(call_frame_t *frame, xlator_t *this, int subvol);
void
-afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd);
+afr_pending_read_increment(afr_private_t *priv, int child_index);
void
-__mark_all_success (call_frame_t *frame, xlator_t *this);
+afr_pending_read_decrement(afr_private_t *priv, int child_index);
+call_frame_t *
+afr_transaction_detach_fop_frame(call_frame_t *frame);
gf_boolean_t
-afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this);
+afr_has_quorum(unsigned char *subvols, xlator_t *this, call_frame_t *frame);
+gf_boolean_t
+afr_needs_changelog_update(afr_local_t *local);
+void
+afr_zero_fill_stat(afr_local_t *local);
-int afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
- afr_read_txn_wind_t readfn, afr_transaction_type type);
+void
+afr_pick_error_xdata(afr_local_t *local, afr_private_t *priv, inode_t *inode1,
+ unsigned char *readable1, inode_t *inode2,
+ unsigned char *readable2);
+int
+afr_transaction_resume(call_frame_t *frame, xlator_t *this);
-int afr_read_txn_continue (call_frame_t *frame, xlator_t *this, int subvol);
+int
+afr_lock(call_frame_t *frame, xlator_t *this);
-int __afr_txn_write_fop (call_frame_t *frame, xlator_t *this);
-int __afr_txn_write_done (call_frame_t *frame, xlator_t *this);
-call_frame_t *afr_transaction_detach_fop_frame (call_frame_t *frame);
-gf_boolean_t afr_has_quorum (unsigned char *subvols, xlator_t *this);
+void
+afr_delayed_changelog_wake_up_cbk(void *data);
+
+int
+afr_release_notify_lock_for_ta(void *opaque);
+int
+afr_ta_lock_release_done(int ret, call_frame_t *ta_frame, void *opaque);
#endif /* __TRANSACTION_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index f962fb6494e..df7366f0a65 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -15,784 +15,1330 @@
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
#include "afr-common.c"
+#include "afr-messages.h"
struct volume_options options[];
+static char *afr_favorite_child_policies[AFR_FAV_CHILD_POLICY_MAX + 1] = {
+ [AFR_FAV_CHILD_NONE] = "none",
+ [AFR_FAV_CHILD_BY_SIZE] = "size",
+ [AFR_FAV_CHILD_BY_CTIME] = "ctime",
+ [AFR_FAV_CHILD_BY_MTIME] = "mtime",
+ [AFR_FAV_CHILD_BY_MAJORITY] = "majority",
+ [AFR_FAV_CHILD_POLICY_MAX] = NULL,
+};
+
int32_t
-notify (xlator_t *this, int32_t event,
- void *data, ...)
+notify(xlator_t *this, int32_t event, void *data, ...)
{
- int ret = -1;
- va_list ap;
- void *data2 = NULL;
+ int ret = -1;
+ va_list ap;
+ void *data2 = NULL;
- va_start (ap, data);
- data2 = va_arg (ap, dict_t*);
- va_end (ap);
- ret = afr_notify (this, event, data, data2);
+ va_start(ap, data);
+ data2 = va_arg(ap, dict_t *);
+ va_end(ap);
+ ret = afr_notify(this, event, data, data2);
- return ret;
+ return ret;
}
int32_t
-mem_acct_init (xlator_t *this)
+mem_acct_init(xlator_t *this)
{
- int ret = -1;
+ int ret = -1;
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_afr_mt_end + 1);
+ if (!this)
+ return ret;
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
+ ret = xlator_mem_acct_init(this, gf_afr_mt_end + 1);
+ if (ret != 0) {
return ret;
-}
+ }
+ return ret;
+}
int
-xlator_subvolume_index (xlator_t *this, xlator_t *subvol)
+xlator_subvolume_index(xlator_t *this, xlator_t *subvol)
{
- int index = -1;
- int i = 0;
- xlator_list_t *list = NULL;
-
- list = this->children;
-
- while (list) {
- if (subvol == list->xlator ||
- strcmp (subvol->name, list->xlator->name) == 0) {
- index = i;
- break;
- }
- list = list->next;
- i++;
+ int index = -1;
+ int i = 0;
+ xlator_list_t *list = NULL;
+
+ list = this->children;
+
+ while (list) {
+ if (subvol == list->xlator ||
+ strcmp(subvol->name, list->xlator->name) == 0) {
+ index = i;
+ break;
}
+ list = list->next;
+ i++;
+ }
- return index;
+ return index;
}
static void
-fix_quorum_options (xlator_t *this, afr_private_t *priv, char *qtype,
- dict_t *options)
+fix_quorum_options(xlator_t *this, afr_private_t *priv, char *qtype,
+ dict_t *options)
{
- if (dict_get (options, "quorum-type") == NULL) {
- /* If user doesn't configure anything enable auto-quorum if the
- * replica has odd number of subvolumes */
- if (priv->child_count % 2)
- qtype = "auto";
- }
-
- if (priv->quorum_count && strcmp (qtype, "fixed")) {
- gf_msg (this->name,GF_LOG_WARNING, 0, AFR_MSG_QUORUM_OVERRIDE,
- "quorum-type %s overriding quorum-count %u",
- qtype, priv->quorum_count);
- }
-
- if (!strcmp (qtype, "none")) {
- priv->quorum_count = 0;
- } else if (!strcmp (qtype, "auto")) {
- priv->quorum_count = AFR_QUORUM_AUTO;
- }
+ if (dict_get_sizen(options, "quorum-type") == NULL) {
+ /* If user doesn't configure anything enable auto-quorum if the
+ * replica has more than two subvolumes */
+ if (priv->child_count > 2)
+ qtype = "auto";
+ }
+
+ if (priv->quorum_count && strcmp(qtype, "fixed")) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_OVERRIDE,
+ "quorum-type %s overriding quorum-count %u", qtype,
+ priv->quorum_count);
+ }
+
+ if (!strcmp(qtype, "none")) {
+ priv->quorum_count = 0;
+ } else if (!strcmp(qtype, "auto")) {
+ priv->quorum_count = AFR_QUORUM_AUTO;
+ }
}
int
-reconfigure (xlator_t *this, dict_t *options)
+afr_set_favorite_child_policy(afr_private_t *priv, char *policy)
{
- afr_private_t *priv = NULL;
- xlator_t *read_subvol = NULL;
- int read_subvol_index = -1;
- int ret = -1;
- int index = -1;
- char *qtype = NULL;
+ int index = -1;
- priv = this->private;
+ index = gf_get_index_by_elem(afr_favorite_child_policies, policy);
+ if (index < 0 || index >= AFR_FAV_CHILD_POLICY_MAX)
+ return -1;
- GF_OPTION_RECONF ("afr-dirty-xattr",
- priv->afr_dirty, options, str,
- out);
+ priv->fav_child_policy = index;
- GF_OPTION_RECONF ("metadata-splitbrain-forced-heal",
- priv->metadata_splitbrain_forced_heal, options, bool,
- out);
-
- GF_OPTION_RECONF ("background-self-heal-count",
- priv->background_self_heal_count, options, uint32,
- out);
-
- GF_OPTION_RECONF ("metadata-self-heal",
- priv->metadata_self_heal, options, bool, out);
+ return 0;
+}
- GF_OPTION_RECONF ("data-self-heal", priv->data_self_heal, options, str,
- out);
+static void
+set_data_self_heal_algorithm(afr_private_t *priv, char *algo)
+{
+ if (!algo) {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_DYNAMIC;
+ } else if (strcmp(algo, "full") == 0) {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_FULL;
+ } else if (strcmp(algo, "diff") == 0) {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_DIFF;
+ } else {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_DYNAMIC;
+ }
+}
- GF_OPTION_RECONF ("entry-self-heal", priv->entry_self_heal, options,
- bool, out);
+void
+afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options)
+{
+ char *volfile_id_str = NULL;
+ uuid_t anon_inode_gfid = {0};
+
+ /*If volume id is not present don't enable anything*/
+ if (dict_get_str(options, "volume-id", &volfile_id_str))
+ return;
+ GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX);
+ /*anon_inode_name is not supposed to change once assigned*/
+ if (!priv->anon_inode_name[0]) {
+ snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s",
+ AFR_ANON_DIR_PREFIX, volfile_id_str);
+ gf_uuid_parse(volfile_id_str, anon_inode_gfid);
+ /*Flip a bit to make sure volfile-id and anon-gfid are not same*/
+ anon_inode_gfid[0] ^= 1;
+ uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str);
+ }
+}
- GF_OPTION_RECONF ("data-self-heal-window-size",
- priv->data_self_heal_window_size, options,
- uint32, out);
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ afr_private_t *priv = NULL;
+ xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
+ int timeout_old = 0;
+ int ret = -1;
+ int index = -1;
+ char *qtype = NULL;
+ char *fav_child_policy = NULL;
+ char *data_self_heal = NULL;
+ char *data_self_heal_algorithm = NULL;
+ char *locking_scheme = NULL;
+ gf_boolean_t consistent_io = _gf_false;
+ gf_boolean_t choose_local_old = _gf_false;
+ gf_boolean_t enabled_old = _gf_false;
- GF_OPTION_RECONF ("data-change-log", priv->data_change_log, options,
- bool, out);
+ priv = this->private;
- GF_OPTION_RECONF ("metadata-change-log",
- priv->metadata_change_log, options, bool, out);
+ GF_OPTION_RECONF("metadata-splitbrain-forced-heal",
+ priv->metadata_splitbrain_forced_heal, options, bool, out);
- GF_OPTION_RECONF ("entry-change-log", priv->entry_change_log, options,
- bool, out);
+ GF_OPTION_RECONF("background-self-heal-count",
+ priv->background_self_heal_count, options, uint32, out);
- GF_OPTION_RECONF ("data-self-heal-algorithm",
- priv->data_self_heal_algorithm, options, str, out);
+ GF_OPTION_RECONF("heal-wait-queue-length", priv->heal_wait_qlen, options,
+ uint32, out);
- GF_OPTION_RECONF ("read-subvolume", read_subvol, options, xlator, out);
+ GF_OPTION_RECONF("metadata-self-heal", priv->metadata_self_heal, options,
+ bool, out);
- GF_OPTION_RECONF ("read-hash-mode", priv->hash_mode,
- options, uint32, out);
+ GF_OPTION_RECONF("data-self-heal", data_self_heal, options, str, out);
+ if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1)
+ goto out;
- if (read_subvol) {
- index = xlator_subvolume_index (this, read_subvol);
- if (index == -1) {
- gf_log (this->name, GF_LOG_ERROR, "%s not a subvolume",
- read_subvol->name);
- goto out;
- }
- priv->read_child = index;
- }
+ GF_OPTION_RECONF("entry-self-heal", priv->entry_self_heal, options, bool,
+ out);
- GF_OPTION_RECONF ("read-subvolume-index",read_subvol_index, options,int32,out);
+ GF_OPTION_RECONF("data-self-heal-window-size",
+ priv->data_self_heal_window_size, options, uint32, out);
- if (read_subvol_index >-1) {
- index=read_subvol_index;
- if (index >= priv->child_count) {
- gf_log (this->name, GF_LOG_ERROR, "%d not a subvolume-index",
- index);
- goto out;
- }
- priv->read_child = index;
- }
+ GF_OPTION_RECONF("data-self-heal-algorithm", data_self_heal_algorithm,
+ options, str, out);
+ set_data_self_heal_algorithm(priv, data_self_heal_algorithm);
- GF_OPTION_RECONF ("pre-op-compat", priv->pre_op_compat, options, bool, out);
+ GF_OPTION_RECONF("halo-enabled", priv->halo_enabled, options, bool, out);
- GF_OPTION_RECONF ("eager-lock", priv->eager_lock, options, bool, out);
- GF_OPTION_RECONF ("quorum-type", qtype, options, str, out);
- GF_OPTION_RECONF ("quorum-count", priv->quorum_count, options,
- uint32, out);
- fix_quorum_options (this, priv, qtype, options);
- if (priv->quorum_count && !afr_has_quorum (priv->child_up, this))
- gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL,
- "Client-quorum is not met");
+ GF_OPTION_RECONF("halo-shd-max-latency", priv->shd.halo_max_latency_msec,
+ options, uint32, out);
+ GF_OPTION_RECONF("halo-nfsd-max-latency", priv->nfsd.halo_max_latency_msec,
+ options, uint32, out);
- GF_OPTION_RECONF ("post-op-delay-secs", priv->post_op_delay_secs, options,
- uint32, out);
+ GF_OPTION_RECONF("halo-max-latency", priv->halo_max_latency_msec, options,
+ uint32, out);
- GF_OPTION_RECONF (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size,
- options, size_uint64, out);
- /* Reset this so we re-discover in case the topology changed. */
- GF_OPTION_RECONF ("ensure-durability", priv->ensure_durability, options,
- bool, out);
+ GF_OPTION_RECONF("halo-max-replicas", priv->halo_max_replicas, options,
+ uint32, out);
- GF_OPTION_RECONF ("self-heal-daemon", priv->shd.enabled, options,
- bool, out);
+ GF_OPTION_RECONF("halo-min-replicas", priv->halo_min_replicas, options,
+ uint32, out);
- GF_OPTION_RECONF ("iam-self-heal-daemon", priv->shd.iamshd, options,
- bool, out);
+ GF_OPTION_RECONF("read-subvolume", read_subvol, options, xlator, out);
- GF_OPTION_RECONF ("heal-timeout", priv->shd.timeout, options,
- int32, out);
+ choose_local_old = priv->choose_local;
+ GF_OPTION_RECONF("choose-local", priv->choose_local, options, bool, out);
- GF_OPTION_RECONF ("quorum-reads", priv->quorum_reads, options,
- bool, out);
- GF_OPTION_RECONF ("consistent-metadata", priv->consistent_metadata,
- options, bool, out);
+ if (choose_local_old != priv->choose_local) {
+ priv->read_child = -1;
+ if (choose_local_old == _gf_false)
+ priv->did_discovery = _gf_false;
+ }
+
+ GF_OPTION_RECONF("read-hash-mode", priv->hash_mode, options, uint32, out);
+
+ if (read_subvol) {
+ index = xlator_subvolume_index(this, read_subvol);
+ if (index == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "%s not a subvolume", read_subvol->name);
+ goto out;
+ }
+ priv->read_child = index;
+ }
+
+ GF_OPTION_RECONF("read-subvolume-index", read_subvol_index, options, int32,
+ out);
+
+ if (read_subvol_index > -1) {
+ index = read_subvol_index;
+ if (index >= priv->child_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "%d not a subvolume-index", index);
+ goto out;
+ }
+ priv->read_child = index;
+ }
+
+ GF_OPTION_RECONF("pre-op-compat", priv->pre_op_compat, options, bool, out);
+ GF_OPTION_RECONF("locking-scheme", locking_scheme, options, str, out);
+ priv->granular_locks = (strcmp(locking_scheme, "granular") == 0);
+ GF_OPTION_RECONF("full-lock", priv->full_lock, options, bool, out);
+ GF_OPTION_RECONF("granular-entry-heal", priv->esh_granular, options, bool,
+ out);
+
+ GF_OPTION_RECONF("eager-lock", priv->eager_lock, options, bool, out);
+ GF_OPTION_RECONF("optimistic-change-log", priv->optimistic_change_log,
+ options, bool, out);
+ GF_OPTION_RECONF("quorum-type", qtype, options, str, out);
+ GF_OPTION_RECONF("quorum-count", priv->quorum_count, options, uint32, out);
+ fix_quorum_options(this, priv, qtype, options);
+ if (priv->quorum_count && !afr_has_quorum(priv->child_up, this, NULL))
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL,
+ "Client-quorum is not met");
+
+ GF_OPTION_RECONF("post-op-delay-secs", priv->post_op_delay_secs, options,
+ uint32, out);
+
+ GF_OPTION_RECONF(AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, options,
+ size_uint64, out);
+ /* Reset this so we re-discover in case the topology changed. */
+ GF_OPTION_RECONF("ensure-durability", priv->ensure_durability, options,
+ bool, out);
+
+ enabled_old = priv->shd.enabled;
+ GF_OPTION_RECONF("self-heal-daemon", priv->shd.enabled, options, bool, out);
+
+ GF_OPTION_RECONF("iam-self-heal-daemon", priv->shd.iamshd, options, bool,
+ out);
+
+ timeout_old = priv->shd.timeout;
+ GF_OPTION_RECONF("heal-timeout", priv->shd.timeout, options, int32, out);
+
+ GF_OPTION_RECONF("consistent-metadata", priv->consistent_metadata, options,
+ bool, out);
+
+ GF_OPTION_RECONF("shd-max-threads", priv->shd.max_threads, options, uint32,
+ out);
+
+ GF_OPTION_RECONF("shd-wait-qlength", priv->shd.wait_qlength, options,
+ uint32, out);
+
+ GF_OPTION_RECONF("favorite-child-policy", fav_child_policy, options, str,
+ out);
+ if (afr_set_favorite_child_policy(priv, fav_child_policy) == -1)
+ goto out;
+
+ priv->did_discovery = _gf_false;
+
+ GF_OPTION_RECONF("consistent-io", consistent_io, options, bool, out);
+ if (priv->quorum_count != 0)
+ consistent_io = _gf_false;
+ priv->consistent_io = consistent_io;
+
+ afr_handle_anon_inode_options(priv, options);
+
+ GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool,
+ out);
+ if (priv->shd.enabled) {
+ if ((priv->shd.enabled != enabled_old) ||
+ (timeout_old != priv->shd.timeout))
+ afr_selfheal_childup(this, priv);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
- priv->did_discovery = _gf_false;
+static int
+afr_pending_xattrs_init(afr_private_t *priv, xlator_t *this)
+{
+ int ret = -1;
+ int i = 0;
+ char *ptr = NULL;
+ char *ptr1 = NULL;
+ char *xattrs_list = NULL;
+ xlator_list_t *trav = NULL;
+ int child_count = -1;
+
+ trav = this->children;
+ child_count = priv->child_count;
+ if (priv->thin_arbiter_count) {
+ /* priv->pending_key[THIN_ARBITER_BRICK_INDEX] is used as the
+ * name of the thin arbiter file for persistence across add/
+ * removal of DHT subvols.*/
+ child_count++;
+ }
+
+ GF_OPTION_INIT("afr-pending-xattr", xattrs_list, str, out);
+ priv->pending_key = GF_CALLOC(sizeof(*priv->pending_key), child_count,
+ gf_afr_mt_char);
+ if (!priv->pending_key) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (!xattrs_list) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_NO_CHANGELOG,
+ "Unable to fetch afr-pending-xattr option from volfile."
+ " Falling back to using client translator names. ");
+ while (i < child_count) {
+ ret = gf_asprintf(&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX,
+ trav->xlator->name);
+ if (ret == -1) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ trav = trav->next;
+ i++;
+ }
ret = 0;
-out:
- return ret;
+ goto out;
+ }
+
+ ptr = ptr1 = gf_strdup(xattrs_list);
+ if (!ptr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0, ptr = strtok(ptr, ","); ptr; ptr = strtok(NULL, ",")) {
+ ret = gf_asprintf(&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX,
+ ptr);
+ if (ret == -1) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ i++;
+ }
+ ret = 0;
+out:
+ GF_FREE(ptr1);
+ return ret;
}
-
-static const char *favorite_child_warning_str = "You have specified subvolume '%s' "
- "as the 'favorite child'. This means that if a discrepancy in the content "
- "or attributes (ownership, permission, etc.) of a file is detected among "
- "the subvolumes, the file on '%s' will be considered the definitive "
- "version and its contents will OVERWRITE the contents of the file on other "
- "subvolumes. All versions of the file except that on '%s' "
- "WILL BE LOST.";
-
+void
+afr_ta_init(afr_private_t *priv)
+{
+ priv->thin_arbiter_count = 1;
+ priv->child_count--;
+ priv->ta_child_up = 0;
+ priv->ta_bad_child_index = AFR_CHILD_UNKNOWN;
+ priv->ta_notify_dom_lock_offset = 0;
+ priv->ta_in_mem_txn_count = 0;
+ priv->ta_on_wire_txn_count = 0;
+ priv->release_ta_notify_dom_lock = _gf_false;
+ INIT_LIST_HEAD(&priv->ta_waitq);
+ INIT_LIST_HEAD(&priv->ta_onwireq);
+ gf_uuid_clear(priv->ta_gfid);
+}
int32_t
-init (xlator_t *this)
+init(xlator_t *this)
{
- afr_private_t *priv = NULL;
- int child_count = 0;
- xlator_list_t *trav = NULL;
- int i = 0;
- int ret = -1;
- GF_UNUSED int op_errno = 0;
- xlator_t *read_subvol = NULL;
- int read_subvol_index = -1;
- xlator_t *fav_child = NULL;
- char *qtype = NULL;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "replicate translator needs more than one "
- "subvolume defined.");
- return -1;
+ afr_private_t *priv = NULL;
+ int child_count = 0;
+ xlator_list_t *trav = NULL;
+ int i = 0;
+ int ret = -1;
+ GF_UNUSED int op_errno = 0;
+ xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
+ char *qtype = NULL;
+ char *fav_child_policy = NULL;
+ char *thin_arbiter = NULL;
+ char *data_self_heal = NULL;
+ char *locking_scheme = NULL;
+ char *data_self_heal_algorithm = NULL;
+
+ if (!this->children) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_CHILD_MISCONFIGURED,
+ "replicate translator needs more than one "
+ "subvolume defined.");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_VOL_MISCONFIGURED,
+ "Volume is dangling.");
+ }
+
+ this->private = GF_CALLOC(1, sizeof(afr_private_t),
+ gf_afr_mt_afr_private_t);
+ if (!this->private)
+ goto out;
+
+ priv = this->private;
+ INIT_LIST_HEAD(&priv->saved_locks);
+ INIT_LIST_HEAD(&priv->lk_healq);
+ LOCK_INIT(&priv->lock);
+
+ child_count = xlator_subvolume_count(this);
+
+ priv->child_count = child_count;
+
+ priv->read_child = -1;
+
+ GF_OPTION_INIT("arbiter-count", priv->arbiter_count, uint32, out);
+ GF_OPTION_INIT("thin-arbiter", thin_arbiter, str, out);
+ if (thin_arbiter && strlen(thin_arbiter) > 0) {
+ afr_ta_init(priv);
+ }
+ INIT_LIST_HEAD(&priv->healing);
+ INIT_LIST_HEAD(&priv->heal_waiting);
+
+ priv->spb_choice_timeout = AFR_DEFAULT_SPB_CHOICE_TIMEOUT;
+
+ GF_OPTION_INIT("afr-dirty-xattr", priv->afr_dirty, str, out);
+
+ GF_OPTION_INIT("metadata-splitbrain-forced-heal",
+ priv->metadata_splitbrain_forced_heal, bool, out);
+
+ GF_OPTION_INIT("read-subvolume", read_subvol, xlator, out);
+ if (read_subvol) {
+ priv->read_child = xlator_subvolume_index(this, read_subvol);
+ if (priv->read_child == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "%s not a subvolume", read_subvol->name);
+ goto out;
}
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "Volume is dangling.");
+ }
+ GF_OPTION_INIT("read-subvolume-index", read_subvol_index, int32, out);
+ if (read_subvol_index > -1) {
+ if (read_subvol_index >= priv->child_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "%d not a subvolume-index", read_subvol_index);
+ goto out;
}
+ priv->read_child = read_subvol_index;
+ }
+ GF_OPTION_INIT("choose-local", priv->choose_local, bool, out);
- this->private = GF_CALLOC (1, sizeof (afr_private_t),
- gf_afr_mt_afr_private_t);
- if (!this->private)
- goto out;
+ priv->pending_reads = GF_CALLOC(sizeof(*priv->pending_reads),
+ priv->child_count, gf_afr_mt_atomic_t);
- priv = this->private;
- LOCK_INIT (&priv->lock);
+ GF_OPTION_INIT("read-hash-mode", priv->hash_mode, uint32, out);
- child_count = xlator_subvolume_count (this);
+ priv->favorite_child = -1;
- priv->child_count = child_count;
+ GF_OPTION_INIT("favorite-child-policy", fav_child_policy, str, out);
+ if (afr_set_favorite_child_policy(priv, fav_child_policy) == -1)
+ goto out;
- priv->read_child = -1;
+ GF_OPTION_INIT("shd-max-threads", priv->shd.max_threads, uint32, out);
- GF_OPTION_INIT ("afr-dirty-xattr", priv->afr_dirty, str, out);
+ GF_OPTION_INIT("shd-wait-qlength", priv->shd.wait_qlength, uint32, out);
- GF_OPTION_INIT ("metadata-splitbrain-forced-heal",
- priv->metadata_splitbrain_forced_heal, bool, out);
+ GF_OPTION_INIT("background-self-heal-count",
+ priv->background_self_heal_count, uint32, out);
- GF_OPTION_INIT ("read-subvolume", read_subvol, xlator, out);
- if (read_subvol) {
- priv->read_child = xlator_subvolume_index (this, read_subvol);
- if (priv->read_child == -1) {
- gf_log (this->name, GF_LOG_ERROR, "%s not a subvolume",
- read_subvol->name);
- goto out;
- }
- }
- GF_OPTION_INIT ("read-subvolume-index",read_subvol_index,int32,out);
- if (read_subvol_index > -1) {
- if (read_subvol_index >= priv->child_count) {
- gf_log (this->name, GF_LOG_ERROR, "%d not a subvolume-index",
- read_subvol_index);
- goto out;
- }
- priv->read_child = read_subvol_index;
- }
- GF_OPTION_INIT ("choose-local", priv->choose_local, bool, out);
-
- GF_OPTION_INIT ("read-hash-mode", priv->hash_mode, uint32, out);
-
- priv->favorite_child = -1;
- GF_OPTION_INIT ("favorite-child", fav_child, xlator, out);
- if (fav_child) {
- priv->favorite_child = xlator_subvolume_index (this, fav_child);
- if (priv->favorite_child == -1) {
- gf_log (this->name, GF_LOG_ERROR, "%s not a subvolume",
- fav_child->name);
- goto out;
- }
- gf_log (this->name, GF_LOG_WARNING,
- favorite_child_warning_str, fav_child->name,
- fav_child->name, fav_child->name);
- }
+ GF_OPTION_INIT("heal-wait-queue-length", priv->heal_wait_qlen, uint32, out);
+ GF_OPTION_INIT("data-self-heal", data_self_heal, str, out);
+ if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1)
+ goto out;
- GF_OPTION_INIT ("background-self-heal-count",
- priv->background_self_heal_count, uint32, out);
+ GF_OPTION_INIT("data-self-heal-algorithm", data_self_heal_algorithm, str,
+ out);
+ set_data_self_heal_algorithm(priv, data_self_heal_algorithm);
- GF_OPTION_INIT ("data-self-heal", priv->data_self_heal, str, out);
+ GF_OPTION_INIT("data-self-heal-window-size",
+ priv->data_self_heal_window_size, uint32, out);
- GF_OPTION_INIT ("data-self-heal-algorithm",
- priv->data_self_heal_algorithm, str, out);
+ GF_OPTION_INIT("metadata-self-heal", priv->metadata_self_heal, bool, out);
- GF_OPTION_INIT ("data-self-heal-window-size",
- priv->data_self_heal_window_size, uint32, out);
+ GF_OPTION_INIT("entry-self-heal", priv->entry_self_heal, bool, out);
- GF_OPTION_INIT ("metadata-self-heal", priv->metadata_self_heal, bool,
- out);
+ GF_OPTION_INIT("halo-shd-max-latency", priv->shd.halo_max_latency_msec,
+ uint32, out);
- GF_OPTION_INIT ("entry-self-heal", priv->entry_self_heal, bool, out);
+ GF_OPTION_INIT("halo-max-latency", priv->halo_max_latency_msec, uint32,
+ out);
+ GF_OPTION_INIT("halo-max-replicas", priv->halo_max_replicas, uint32, out);
+ GF_OPTION_INIT("halo-min-replicas", priv->halo_min_replicas, uint32, out);
- GF_OPTION_INIT ("data-change-log", priv->data_change_log, bool, out);
+ GF_OPTION_INIT("halo-enabled", priv->halo_enabled, bool, out);
- GF_OPTION_INIT ("metadata-change-log", priv->metadata_change_log, bool,
- out);
+ GF_OPTION_INIT("halo-nfsd-max-latency", priv->nfsd.halo_max_latency_msec,
+ uint32, out);
- GF_OPTION_INIT ("entry-change-log", priv->entry_change_log, bool, out);
+ GF_OPTION_INIT("iam-nfs-daemon", priv->nfsd.iamnfsd, bool, out);
- GF_OPTION_INIT ("optimistic-change-log", priv->optimistic_change_log,
- bool, out);
+ GF_OPTION_INIT("optimistic-change-log", priv->optimistic_change_log, bool,
+ out);
- GF_OPTION_INIT ("inodelk-trace", priv->inodelk_trace, bool, out);
+ GF_OPTION_INIT("pre-op-compat", priv->pre_op_compat, bool, out);
+ GF_OPTION_INIT("locking-scheme", locking_scheme, str, out);
+ priv->granular_locks = (strcmp(locking_scheme, "granular") == 0);
+ GF_OPTION_INIT("full-lock", priv->full_lock, bool, out);
+ GF_OPTION_INIT("granular-entry-heal", priv->esh_granular, bool, out);
- GF_OPTION_INIT ("entrylk-trace", priv->entrylk_trace, bool, out);
+ GF_OPTION_INIT("eager-lock", priv->eager_lock, bool, out);
+ GF_OPTION_INIT("quorum-type", qtype, str, out);
+ GF_OPTION_INIT("quorum-count", priv->quorum_count, uint32, out);
+ GF_OPTION_INIT(AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, size_uint64,
+ out);
+ fix_quorum_options(this, priv, qtype, this->options);
- GF_OPTION_INIT ("pre-op-compat", priv->pre_op_compat, bool, out);
+ GF_OPTION_INIT("post-op-delay-secs", priv->post_op_delay_secs, uint32, out);
+ GF_OPTION_INIT("ensure-durability", priv->ensure_durability, bool, out);
- GF_OPTION_INIT ("eager-lock", priv->eager_lock, bool, out);
- GF_OPTION_INIT ("quorum-type", qtype, str, out);
- GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out);
- GF_OPTION_INIT (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, size_uint64,
- out);
- fix_quorum_options (this, priv, qtype, this->options);
+ GF_OPTION_INIT("self-heal-daemon", priv->shd.enabled, bool, out);
- GF_OPTION_INIT ("post-op-delay-secs", priv->post_op_delay_secs, uint32, out);
- GF_OPTION_INIT ("ensure-durability", priv->ensure_durability, bool,
- out);
+ GF_OPTION_INIT("iam-self-heal-daemon", priv->shd.iamshd, bool, out);
+ GF_OPTION_INIT("heal-timeout", priv->shd.timeout, int32, out);
- GF_OPTION_INIT ("self-heal-daemon", priv->shd.enabled, bool, out);
+ GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out);
+ GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out);
+ afr_handle_anon_inode_options(priv, this->options);
- GF_OPTION_INIT ("iam-self-heal-daemon", priv->shd.iamshd, bool, out);
- GF_OPTION_INIT ("heal-timeout", priv->shd.timeout, int32, out);
+ GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out);
+ if (priv->quorum_count != 0)
+ priv->consistent_io = _gf_false;
- GF_OPTION_INIT ("quorum-reads", priv->quorum_reads, bool, out);
- GF_OPTION_INIT ("consistent-metadata", priv->consistent_metadata, bool,
- out);
+ priv->wait_count = 1;
- priv->wait_count = 1;
+ priv->local = GF_CALLOC(sizeof(unsigned char), child_count, gf_afr_mt_char);
+ if (!priv->local) {
+ ret = -ENOMEM;
+ goto out;
+ }
- priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
- gf_afr_mt_char);
- if (!priv->child_up) {
- ret = -ENOMEM;
- goto out;
- }
+ priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
- for (i = 0; i < child_count; i++)
- priv->child_up[i] = -1; /* start with unknown state.
- this initialization needed
- for afr_notify() to work
- reliably
- */
+ priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
- priv->children = GF_CALLOC (sizeof (xlator_t *), child_count,
- gf_afr_mt_xlator_t);
- if (!priv->children) {
- ret = -ENOMEM;
- goto out;
- }
+ priv->child_latency = GF_MALLOC(sizeof(*priv->child_latency) * child_count,
+ gf_afr_mt_child_latency_t);
+ priv->halo_child_up = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
- priv->pending_key = GF_CALLOC (sizeof (*priv->pending_key),
- child_count,
- gf_afr_mt_char);
- if (!priv->pending_key) {
- ret = -ENOMEM;
- goto out;
+ if (!priv->child_up || !priv->child_latency || !priv->halo_child_up ||
+ !priv->anon_inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ /*Initialize to -ve ping timeout so that they are not considered
+ * in child-up events until ping-event comes*/
+ for (i = 0; i < child_count; i++)
+ priv->child_latency[i] = -1;
+
+ priv->children = GF_CALLOC(sizeof(xlator_t *), child_count,
+ gf_afr_mt_xlator_t);
+ if (!priv->children) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = afr_pending_xattrs_init(priv, this);
+ if (ret)
+ goto out;
+
+ trav = this->children;
+ i = 0;
+ while (i < child_count) {
+ priv->children[i] = trav->xlator;
+ trav = trav->next;
+ i++;
+ }
+
+ ret = gf_asprintf(&priv->sh_domain, AFR_SH_DATA_DOMAIN_FMT, this->name);
+ if (-1 == ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ priv->last_event = GF_CALLOC(child_count, sizeof(*priv->last_event),
+ gf_afr_mt_int32_t);
+ if (!priv->last_event) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (priv->shd.iamshd) {
+ ret = afr_selfheal_daemon_init(this);
+ if (ret) {
+ ret = -ENOMEM;
+ goto out;
}
+ }
- trav = this->children;
- i = 0;
- while (i < child_count) {
- priv->children[i] = trav->xlator;
-
- ret = gf_asprintf (&priv->pending_key[i], "%s.%s",
- AFR_XATTR_PREFIX,
- trav->xlator->name);
- if (-1 == ret) {
- ret = -ENOMEM;
- goto out;
- }
-
- trav = trav->next;
- i++;
- }
+ /* keep more local here as we may need them for self-heal etc */
+ this->local_pool = mem_pool_new(afr_local_t, 512);
+ if (!this->local_pool) {
+ ret = -1;
+ goto out;
+ }
- ret = gf_asprintf (&priv->sh_domain, AFR_SH_DATA_DOMAIN_FMT,
- this->name);
- if (-1 == ret) {
- ret = -ENOMEM;
- goto out;
- }
+ priv->root_inode = NULL;
- priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event),
- gf_afr_mt_int32_t);
- if (!priv->last_event) {
- ret = -ENOMEM;
- goto out;
- }
+ ret = 0;
+out:
+ return ret;
+}
+void
+afr_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
+{
+ int ret = -1;
+
+ if (!healer)
+ return;
+
+ if (healer->running) {
+ /*
+ * If there are any resources to cleanup, We need
+ * to do that gracefully using pthread_cleanup_push
+ */
+ ret = gf_thread_cleanup_xint(healer->thread);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SELF_HEAL_FAILED,
+ "Failed to clean up healer threads.");
+ healer->thread = 0;
+ }
+ pthread_cond_destroy(&healer->cond);
+ pthread_mutex_destroy(&healer->mutex);
+}
- ret = afr_selfheal_daemon_init (this);
- if (ret) {
- ret = -ENOMEM;
- goto out;
- }
-
- /* keep more local here as we may need them for self-heal etc */
- this->local_pool = mem_pool_new (afr_local_t, 512);
- if (!this->local_pool) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- goto out;
- }
+void
+afr_selfheal_daemon_fini(xlator_t *this)
+{
+ struct subvol_healer *healer = NULL;
+ afr_self_heald_t *shd = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ shd = &priv->shd;
+ if (!shd->iamshd)
+ return;
+
+ for (i = 0; i < priv->child_count; i++) {
+ healer = &shd->index_healers[i];
+ afr_destroy_healer_object(this, healer);
+
+ healer = &shd->full_healers[i];
+ afr_destroy_healer_object(this, healer);
+
+ if (shd->statistics[i])
+ eh_destroy(shd->statistics[i]);
+ }
+ GF_FREE(shd->index_healers);
+ GF_FREE(shd->full_healers);
+ GF_FREE(shd->statistics);
+ if (shd->split_brain)
+ eh_destroy(shd->split_brain);
+}
+void
+fini(xlator_t *this)
+{
+ afr_private_t *priv = NULL;
- priv->root_inode = NULL;
+ priv = this->private;
- ret = 0;
-out:
- return ret;
-}
+ afr_selfheal_daemon_fini(this);
+ GF_ASSERT(list_empty(&priv->saved_locks));
+ LOCK(&priv->lock);
+ if (priv->timer != NULL) {
+ gf_timer_call_cancel(this->ctx, priv->timer);
+ priv->timer = NULL;
+ }
+ UNLOCK(&priv->lock);
-int
-fini (xlator_t *this)
-{
- afr_private_t *priv = NULL;
+ if (this->local_pool != NULL) {
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
- priv = this->private;
- this->private = NULL;
- afr_priv_destroy (priv);
- //if (this->itable);//I dont see any destroy func
+ this->private = NULL;
+ afr_priv_destroy(priv);
+ if (this->itable) {
+ inode_table_destroy(this->itable);
+ this->itable = NULL;
+ }
- return 0;
+ return;
}
-
struct xlator_fops fops = {
- .lookup = afr_lookup,
- .open = afr_open,
- .lk = afr_lk,
- .flush = afr_flush,
- .statfs = afr_statfs,
- .fsync = afr_fsync,
- .fsyncdir = afr_fsyncdir,
- .xattrop = afr_xattrop,
- .fxattrop = afr_fxattrop,
- .inodelk = afr_inodelk,
- .finodelk = afr_finodelk,
- .entrylk = afr_entrylk,
- .fentrylk = afr_fentrylk,
-
- /* inode read */
- .access = afr_access,
- .stat = afr_stat,
- .fstat = afr_fstat,
- .readlink = afr_readlink,
- .getxattr = afr_getxattr,
- .fgetxattr = afr_fgetxattr,
- .readv = afr_readv,
-
- /* inode write */
- .writev = afr_writev,
- .truncate = afr_truncate,
- .ftruncate = afr_ftruncate,
- .setxattr = afr_setxattr,
- .fsetxattr = afr_fsetxattr,
- .setattr = afr_setattr,
- .fsetattr = afr_fsetattr,
- .removexattr = afr_removexattr,
- .fremovexattr = afr_fremovexattr,
- .fallocate = afr_fallocate,
- .discard = afr_discard,
- .zerofill = afr_zerofill,
-
- /* dir read */
- .opendir = afr_opendir,
- .readdir = afr_readdir,
- .readdirp = afr_readdirp,
-
- /* dir write */
- .create = afr_create,
- .mknod = afr_mknod,
- .mkdir = afr_mkdir,
- .unlink = afr_unlink,
- .rmdir = afr_rmdir,
- .link = afr_link,
- .symlink = afr_symlink,
- .rename = afr_rename,
+ .lookup = afr_lookup,
+ .lk = afr_lk,
+ .flush = afr_flush,
+ .statfs = afr_statfs,
+ .fsyncdir = afr_fsyncdir,
+ .inodelk = afr_inodelk,
+ .finodelk = afr_finodelk,
+ .entrylk = afr_entrylk,
+ .fentrylk = afr_fentrylk,
+ .ipc = afr_ipc,
+ .lease = afr_lease,
+
+ /* inode read */
+ .access = afr_access,
+ .stat = afr_stat,
+ .fstat = afr_fstat,
+ .readlink = afr_readlink,
+ .getxattr = afr_getxattr,
+ .fgetxattr = afr_fgetxattr,
+ .readv = afr_readv,
+ .seek = afr_seek,
+
+ /* inode write */
+ .writev = afr_writev,
+ .truncate = afr_truncate,
+ .ftruncate = afr_ftruncate,
+ .setxattr = afr_setxattr,
+ .fsetxattr = afr_fsetxattr,
+ .setattr = afr_setattr,
+ .fsetattr = afr_fsetattr,
+ .removexattr = afr_removexattr,
+ .fremovexattr = afr_fremovexattr,
+ .fallocate = afr_fallocate,
+ .discard = afr_discard,
+ .zerofill = afr_zerofill,
+ .xattrop = afr_xattrop,
+ .fxattrop = afr_fxattrop,
+ .fsync = afr_fsync,
+
+ /*inode open*/
+ .opendir = afr_opendir,
+ .open = afr_open,
+
+ /* dir read */
+ .readdir = afr_readdir,
+ .readdirp = afr_readdirp,
+
+ /* dir write */
+ .create = afr_create,
+ .mknod = afr_mknod,
+ .mkdir = afr_mkdir,
+ .unlink = afr_unlink,
+ .rmdir = afr_rmdir,
+ .link = afr_link,
+ .symlink = afr_symlink,
+ .rename = afr_rename,
};
-
struct xlator_dumpops dumpops = {
- .priv = afr_priv_dump,
+ .priv = afr_priv_dump,
};
-
struct xlator_cbks cbks = {
- .release = afr_release,
- .releasedir = afr_releasedir,
- .forget = afr_forget,
+ .release = afr_release,
+ .releasedir = afr_releasedir,
+ .forget = afr_forget,
};
-
struct volume_options options[] = {
- { .key = {"read-subvolume" },
- .type = GF_OPTION_TYPE_XLATOR,
- .description = "inode-read fops happen only on one of the bricks in "
- "replicate. Afr will prefer the one specified using "
- "this option if it is not stale. Option value must be "
- "one of the xlator names of the children. "
- "Ex: <volname>-client-0 till "
- "<volname>-client-<number-of-bricks - 1>"
- },
- { .key = {"read-subvolume-index" },
- .type = GF_OPTION_TYPE_INT,
- .default_value = "-1",
- .description = "inode-read fops happen only on one of the bricks in "
- "replicate. AFR will prefer the one specified using "
- "this option if it is not stale. allowed options"
- " include -1 till replica-count - 1"
- },
- { .key = {"read-hash-mode" },
- .type = GF_OPTION_TYPE_INT,
- .min = 0,
- .max = 2,
- .default_value = "1",
- .description = "inode-read fops happen only on one of the bricks in "
- "replicate. AFR will prefer the one computed using "
- "the method specified using this option"
- "0 = first up server, "
- "1 = hash by GFID of file (all clients use "
- "same subvolume), "
- "2 = hash by GFID of file and client PID",
- },
- { .key = {"choose-local" },
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "true",
- .description = "Choose a local subvolume (i.e. Brick) to read from"
- " if read-subvolume is not explicitly set.",
- },
- { .key = {"favorite-child"},
- .type = GF_OPTION_TYPE_XLATOR,
- .description = "If a split-brain happens choose subvol/brick set by "
- "this option as source."
- },
- { .key = {"background-self-heal-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0,
- .default_value = "16",
- .validate = GF_OPT_VALIDATE_MIN,
- .description = "This specifies the number of self-heals that can be "
- " performed in background without blocking the fop"
- },
- { .key = {"data-self-heal"},
- .type = GF_OPTION_TYPE_STR,
- .value = {"1", "on", "yes", "true", "enable",
- "0", "off", "no", "false", "disable",
- "open"},
- .default_value = "on",
- .description = "Using this option we can enable/disable data "
- "self-heal on the file. \"open\" means data "
- "self-heal action will only be triggered by file "
- "open operations."
- },
- { .key = {"data-self-heal-algorithm"},
- .type = GF_OPTION_TYPE_STR,
- .description = "Select between \"full\", \"diff\". The "
- "\"full\" algorithm copies the entire file from "
- "source to sink. The \"diff\" algorithm copies to "
- "sink only those blocks whose checksums don't match "
- "with those of source. If no option is configured "
- "the option is chosen dynamically as follows: "
- "If the file does not exist on one of the sinks "
- "or empty file exists or if the source file size is "
- "about the same as page size the entire file will "
- "be read and written i.e \"full\" algo, "
- "otherwise \"diff\" algo is chosen.",
- .value = { "diff", "full"}
- },
- { .key = {"data-self-heal-window-size"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = 1024,
- .default_value = "1",
- .description = "Maximum number blocks per file for which self-heal "
- "process would be applied simultaneously."
- },
- { .key = {"metadata-self-heal"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- .description = "Using this option we can enable/disable metadata "
- "i.e. Permissions, ownerships, xattrs self-heal on "
- "the file/directory."
- },
- { .key = {"entry-self-heal"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- .description = "Using this option we can enable/disable entry "
- "self-heal on the directory."
- },
- { .key = {"data-change-log"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- .description = "Data fops like write/truncate will not perform "
- "pre/post fop changelog operations in afr transaction "
- "if this option is disabled"
- },
- { .key = {"metadata-change-log"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- .description = "Metadata fops like setattr/setxattr will not perform "
- "pre/post fop changelog operations in afr transaction "
- "if this option is disabled"
- },
- { .key = {"entry-change-log"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- .description = "Entry fops like create/unlink will not perform "
- "pre/post fop changelog operations in afr transaction "
- "if this option is disabled"
- },
- { .key = {"optimistic-change-log"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- .description = "Entry/Metadata fops will not perform "
- "pre fop changelog operations in afr transaction "
- "if this option is enabled."
- },
- { .key = {"inodelk-trace"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- .description = "Enabling this option logs inode lock/unlocks"
- },
- { .key = {"entrylk-trace"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- .description = "Enabling this option logs entry lock/unlocks"
- },
- { .key = {"pre-op-compat"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- .description = "Use separate pre-op xattrop() FOP rather than "
- "overloading xdata of the OP"
- },
- { .key = {"eager-lock"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- .description = "Lock phase of a transaction has two sub-phases. "
- "First is an attempt to acquire locks in parallel by "
- "broadcasting non-blocking lock requests. If lock "
- "acquisition fails on any server, then the held locks "
- "are unlocked and revert to a blocking locked mode "
- "sequentially on one server after another. If this "
- "option is enabled the initial broadcasting lock "
- "request attempt to acquire lock on the entire file. "
- "If this fails, we revert back to the sequential "
- "\"regional\" blocking lock as before. In the case "
- "where such an \"eager\" lock is granted in the "
- "non-blocking phase, it gives rise to an opportunity "
- "for optimization. i.e, if the next write transaction "
- "on the same FD arrives before the unlock phase of "
- "the first transaction, it \"takes over\" the full "
- "file lock. Similarly if yet another data transaction "
- "arrives before the unlock phase of the \"optimized\" "
- "transaction, that in turn \"takes over\" the lock as "
- "well. The actual unlock now happens at the end of "
- "the last \"optimized\" transaction."
-
- },
- { .key = {"self-heal-daemon"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- .description = "This option applies to only self-heal-daemon. "
- "Index directory crawl and automatic healing of files "
- "will not be performed if this option is turned off."
- },
- { .key = {"iam-self-heal-daemon"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- .description = "This option differentiates if the replicate "
- "translator is running as part of self-heal-daemon "
- "or not."
- },
- { .key = {"quorum-type"},
- .type = GF_OPTION_TYPE_STR,
- .value = { "none", "auto", "fixed"},
- .default_value = "none",
- .description = "If value is \"fixed\" only allow writes if "
- "quorum-count bricks are present. If value is "
- "\"auto\" only allow writes if more than half of "
- "bricks, or exactly half including the first, are "
- "present.",
- },
- { .key = {"quorum-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = INT_MAX,
- .default_value = 0,
- .description = "If quorum-type is \"fixed\" only allow writes if "
- "this many bricks or present. Other quorum types "
- "will OVERWRITE this value.",
- },
- { .key = {"quorum-reads"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "no",
- .description = "If quorum-reads is \"true\" only allow reads if "
- "quorum is met when quorum is enabled.",
- },
- { .key = {"node-uuid"},
- .type = GF_OPTION_TYPE_STR,
- .description = "Local glusterd uuid string, used in starting "
- "self-heal-daemon so that it can crawl only on "
- "local index directories.",
- },
- { .key = {"post-op-delay-secs"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0,
- .max = INT_MAX,
- .default_value = "1",
- .description = "Time interval induced artificially before "
- "post-operation phase of the transaction to "
- "enhance overlap of adjacent write operations.",
- },
- { .key = {AFR_SH_READDIR_SIZE_KEY},
- .type = GF_OPTION_TYPE_SIZET,
- .description = "readdirp size for performing entry self-heal",
- .min = 1024,
- .max = 131072,
- .default_value = "1KB",
- },
- { .key = {"ensure-durability"},
- .type = GF_OPTION_TYPE_BOOL,
- .description = "Afr performs fsyncs for transactions if this "
- "option is on to make sure the changelogs/data is "
- "written to the disk",
- .default_value = "on",
- },
- { .key = {"afr-dirty-xattr"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = AFR_DIRTY_DEFAULT,
- },
- { .key = {"metadata-splitbrain-forced-heal"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"heal-timeout"},
- .type = GF_OPTION_TYPE_INT,
- .min = 60,
- .max = INT_MAX,
- .default_value = "600",
- .description = "time interval for checking the need to self-heal "
- "in self-heal-daemon"
- },
- { .key = {"consistent-metadata"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "no",
- .description = "If this option is enabled, readdirp will force "
- "lookups on those entries read whose read child is "
- "not the same as that of the parent. This will "
- "guarantee that all read operations on a file serve "
- "attributes from the same subvol as long as it holds "
- " a good copy of the file/dir.",
- },
- { .key = {NULL} },
+ {.key = {"read-subvolume"},
+ .type = GF_OPTION_TYPE_XLATOR,
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. Afr will prefer the one specified using "
+ "this option if it is not stale. Option value must be "
+ "one of the xlator names of the children. "
+ "Ex: <volname>-client-0 till "
+ "<volname>-client-<number-of-bricks - 1>"},
+ {.key = {"read-subvolume-index"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "-1",
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one specified using "
+ "this option if it is not stale. allowed options"
+ " include -1 till replica-count - 1"},
+ {.key = {"read-hash-mode"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 5,
+ .default_value = "1",
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description =
+ "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one computed using "
+ "the method specified using this option.\n"
+ "0 = first readable child of AFR, starting from 1st child.\n"
+ "1 = hash by GFID of file (all clients use "
+ "same subvolume).\n"
+ "2 = hash by GFID of file and client PID.\n"
+ "3 = brick having the least outstanding read requests.\n"
+ "4 = brick having the least network ping latency.\n"
+ "5 = Hybrid mode between 3 and 4, ie least value among "
+ "network-latency multiplied by outstanding-read-requests."},
+ {
+ .key = {"choose-local"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Choose a local subvolume (i.e. Brick) to read from"
+ " if read-subvolume is not explicitly set.",
+ },
+ {.key = {"background-self-heal-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 256,
+ .default_value = "8",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This specifies the number of per client self-heal "
+ "jobs that can perform parallel heals in the "
+ "background."},
+ {.key = {"halo-shd-max-latency"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "99999",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "Maximum latency for shd halo replication in msec."},
+ {.key = {"halo-enabled"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "False",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "Enable Halo (geo) replication mode."},
+ {.key = {"halo-nfsd-max-latency"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "5",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "Maximum latency for nfsd halo replication in msec."},
+ {.key = {"halo-max-latency"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = AFR_HALO_MAX_LATENCY,
+ .default_value = "5",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "Maximum latency for halo replication in msec."},
+ {.key = {"halo-max-replicas"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "99999",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "The maximum number of halo replicas; replicas"
+ " beyond this value will be written asynchronously"
+ "via the SHD."},
+ {.key = {"halo-min-replicas"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "2",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "The minimmum number of halo replicas, before adding "
+ "out of region replicas."},
+ {.key = {"heal-wait-queue-length"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 10000, /*Around 100MB with sizeof(afr_local_t)= 10496 bytes*/
+ .default_value = "128",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .op_version = {GD_OP_VERSION_3_7_10},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This specifies the number of heals that can be queued"
+ " for the parallel background self heal jobs."},
+ {.key = {"data-self-heal"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"1", "on", "yes", "true", "enable", "0", "off", "no", "false",
+ "disable", "open"},
+ .default_value = "off",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Using this option we can enable/disable data "
+ "self-heal on the file. \"open\" means data "
+ "self-heal action will only be triggered by file "
+ "open operations."},
+ {.key = {"data-self-heal-algorithm"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Select between \"full\", \"diff\". The "
+ "\"full\" algorithm copies the entire file from "
+ "source to sink. The \"diff\" algorithm copies to "
+ "sink only those blocks whose checksums don't match "
+ "with those of source. If no option is configured "
+ "the option is chosen dynamically as follows: "
+ "If the file does not exist on one of the sinks "
+ "or empty file exists or if the source file size is "
+ "about the same as page size the entire file will "
+ "be read and written i.e \"full\" algo, "
+ "otherwise \"diff\" algo is chosen.",
+ .value = {"diff", "full"}},
+ {.key = {"data-self-heal-window-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 1024,
+ .default_value = "1",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Maximum number blocks per file for which self-heal "
+ "process would be applied simultaneously."},
+ {.key = {"metadata-self-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ /*.validate_fn = validate_replica*/
+ .description = "Using this option we can enable/disable metadata "
+ "i.e. Permissions, ownerships, xattrs self-heal on "
+ "the file/directory."},
+ {.key = {"entry-self-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ /*.validate_fn = validate_replica*/
+ .description = "Using this option we can enable/disable entry "
+ "self-heal on the directory."},
+ {.key = {"data-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option exists only for backward compatibility "
+ "and configuring it doesn't have any effect"},
+ {.key = {"metadata-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option exists only for backward compatibility "
+ "and configuring it doesn't have any effect"},
+ {.key = {"entry-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option exists only for backward compatibility "
+ "and configuring it doesn't have any effect"},
+ {.key = {"optimistic-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Entry/Metadata fops will not perform "
+ "pre fop changelog operations in afr transaction "
+ "if this option is enabled."},
+ {.key = {"inodelk-trace"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enabling this option logs inode lock/unlocks"},
+ {.key = {"entrylk-trace"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enabling this option logs entry lock/unlocks"},
+ {.key = {"pre-op-compat"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Use separate pre-op xattrop() FOP rather than "
+ "overloading xdata of the OP"},
+ {.key = {"eager-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description =
+ "Enable/Disable eager lock for replica volume. "
+ "Lock phase of a transaction has two sub-phases. "
+ "First is an attempt to acquire locks in parallel by "
+ "broadcasting non-blocking lock requests. If lock "
+ "acquisition fails on any server, then the held locks "
+ "are unlocked and we revert to a blocking locks mode "
+ "sequentially on one server after another. If this "
+ "option is enabled the initial broadcasting lock "
+ "request attempts to acquire a full lock on the entire file. "
+ "If this fails, we revert back to the sequential "
+ "\"regional\" blocking locks as before. In the case "
+ "where such an \"eager\" lock is granted in the "
+ "non-blocking phase, it gives rise to an opportunity "
+ "for optimization. i.e, if the next write transaction "
+ "on the same FD arrives before the unlock phase of "
+ "the first transaction, it \"takes over\" the full "
+ "file lock. Similarly if yet another data transaction "
+ "arrives before the unlock phase of the \"optimized\" "
+ "transaction, that in turn \"takes over\" the lock as "
+ "well. The actual unlock now happens at the end of "
+ "the last \"optimized\" transaction."
+
+ },
+ {.key = {"self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ /*.validate_fn = validate_replica_heal_enable_disable*/
+ .description = "This option applies to only self-heal-daemon. "
+ "Index directory crawl and automatic healing of files "
+ "will not be performed if this option is turned off."},
+ {.key = {"iam-self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option differentiates if the replicate "
+ "translator is running as part of self-heal-daemon "
+ "or not."},
+ {.key = {"iam-nfs-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option differentiates if the replicate "
+ "translator is running as part of an NFS daemon "
+ "or not."},
+ {
+ .key = {"quorum-type"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"none", "auto", "fixed"},
+ .default_value = "none",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ /*.option = quorum-type*/
+ .description = "If value is \"fixed\" only allow writes if "
+ "quorum-count bricks are present. If value is "
+ "\"auto\" only allow writes if more than half of "
+ "bricks, or exactly half including the first, are "
+ "present.",
+ },
+ {
+ .key = {"quorum-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = INT_MAX,
+ .default_value = 0,
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ /*.option = quorum-count*/
+ /*.validate_fn = validate_quorum_count*/
+ .description = "If quorum-type is \"fixed\" only allow writes if "
+ "this many bricks are present. Other quorum types "
+ "will OVERWRITE this value.",
+ },
+ {
+ .key = {"quorum-reads"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option has been removed. Reads are not allowed "
+ "if quorum is not met.",
+ },
+ {
+ .key = {"node-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Local glusterd uuid string, used in starting "
+ "self-heal-daemon so that it can crawl only on "
+ "local index directories.",
+ },
+ {
+ .key = {"post-op-delay-secs"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "1",
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Time interval induced artificially before "
+ "post-operation phase of the transaction to "
+ "enhance overlap of adjacent write operations.",
+ },
+ {
+ .key = {AFR_SH_READDIR_SIZE_KEY},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "readdirp size for performing entry self-heal",
+ .min = 1024,
+ .max = 131072,
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .default_value = "1KB",
+ },
+ {
+ .key = {"ensure-durability"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .op_version = {3},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Afr performs fsyncs for transactions if this "
+ "option is on to make sure the changelogs/data is "
+ "written to the disk",
+ .default_value = "on",
+ },
+ {
+ .key = {"afr-dirty-xattr"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = AFR_DIRTY_DEFAULT,
+ },
+ {.key = {"afr-pending-xattr"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Comma separated list of xattrs that are used to "
+ "capture information on pending heals."},
+ {
+ .key = {"metadata-splitbrain-forced-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+ {.key = {"heal-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 5,
+ .max = INT_MAX,
+ .default_value = "600",
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "time interval for checking the need to self-heal "
+ "in self-heal-daemon"},
+ {
+ .key = {"consistent-metadata"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "If this option is enabled, readdirp will force "
+ "lookups on those entries read whose read child is "
+ "not the same as that of the parent. This will "
+ "guarantee that all read operations on a file serve "
+ "attributes from the same subvol as long as it holds "
+ " a good copy of the file/dir.",
+ },
+ {.key = {"arbiter-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .description = "subset of child_count. Has to be 0 or 1."},
+ {
+ .key = {"thin-arbiter"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .description = "contains host:path of thin abriter brick",
+ },
+ {.key = {"shd-max-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 64,
+ .default_value = "1",
+ .op_version = {GD_OP_VERSION_3_7_12},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Maximum number of parallel heals SHD can do per "
+ "local brick. This can substantially lower heal times"
+ ", but can also crush your bricks if you don't have "
+ "the storage hardware to support this."},
+ {
+ .key = {"shd-wait-qlength"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 655536,
+ .default_value = "1024",
+ .op_version = {GD_OP_VERSION_3_7_12},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option can be used to control number of heals"
+ " that can wait in SHD per subvolume",
+ },
+ {
+ .key = {"locking-scheme"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"full", "granular"},
+ .default_value = "full",
+ .op_version = {GD_OP_VERSION_3_7_12},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "If this option is set to granular, self-heal will "
+ "stop being compatible with afr-v1, which helps afr "
+ "be more granular while self-healing",
+ },
+ {.key = {"full-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "yes",
+ .op_version = {GD_OP_VERSION_3_13_2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .description = "If this option is disabled, then the IOs will take "
+ "range locks same as versions till 3.13.1."},
+ {
+ .key = {"granular-entry-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_3_8_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "If this option is enabled, self-heal will resort to "
+ "granular way of recording changelogs and doing entry "
+ "self-heal.",
+ },
+ {
+ .key = {"favorite-child-policy"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"none", "size", "ctime", "mtime", "majority"},
+ .default_value = "none",
+ .op_version = {GD_OP_VERSION_3_7_12},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option can be used to automatically resolve "
+ "split-brains using various policies without user "
+ "intervention. \"size\" picks the file with the "
+ "biggest size as the source. \"ctime\" and \"mtime\" "
+ "pick the file with the latest ctime and mtime "
+ "respectively as the source. \"majority\" picks a file"
+ " with identical mtime and size in more than half the "
+ "number of bricks in the replica.",
+ },
+ {
+ .key = {"consistent-io"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description = "If this option is enabled, i/o will fail even if "
+ "one of the bricks is down in the replicas",
+ },
+ {.key = {"use-compound-fops"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_3_8_4},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option exists only for backward compatibility "
+ "and configuring it doesn't have any effect"},
+ {.key = {"use-anonymous-inode"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_8_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .description = "Setting this option heals directory renames efficiently"},
+
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "replicate",
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index de000e765ea..d62f9a9caf2 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -8,161 +8,293 @@
cases as published by the Free Software Foundation.
*/
-
#ifndef __AFR_H__
#define __AFR_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "call-stub.h"
-#include "compat-errno.h"
+#include <glusterfs/call-stub.h>
+#include <glusterfs/compat-errno.h>
#include "afr-mem-types.h"
#include "libxlator.h"
-#include "timer.h"
-#include "syncop.h"
+#include <glusterfs/timer.h>
+#include <glusterfs/syncop.h>
#include "afr-self-heald.h"
#include "afr-messages.h"
-#define AFR_XATTR_PREFIX "trusted.afr"
+#define SHD_INODE_LRU_LIMIT 1
#define AFR_PATHINFO_HEADER "REPLICATE:"
#define AFR_SH_READDIR_SIZE_KEY "self-heal-readdir-size"
#define AFR_SH_DATA_DOMAIN_FMT "%s:self-heal"
#define AFR_DIRTY_DEFAULT AFR_XATTR_PREFIX ".dirty"
-#define AFR_DIRTY (((afr_private_t *) (THIS->private))->afr_dirty)
+#define AFR_DIRTY (((afr_private_t *)(THIS->private))->afr_dirty)
+
+#define AFR_LOCKEE_COUNT_MAX 3
+#define AFR_DOM_COUNT_MAX 3
+#define AFR_NUM_CHANGE_LOGS 3 /*data + metadata + entry*/
+#define AFR_DEFAULT_SPB_CHOICE_TIMEOUT 300 /*in seconds*/
+
+#define ARBITER_BRICK_INDEX 2
+#define THIN_ARBITER_BRICK_INDEX 2
+#define AFR_TA_DOM_NOTIFY "afr.ta.dom-notify"
+#define AFR_TA_DOM_MODIFY "afr.ta.dom-modify"
+
+#define AFR_LK_HEAL_DOM "afr.lock-heal.domain"
+
+#define AFR_HALO_MAX_LATENCY 99999
+#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode"
+
+#define PFLAG_PENDING (1 << 0)
+#define PFLAG_SBRAIN (1 << 1)
+
+typedef int (*afr_lock_cbk_t)(call_frame_t *frame, xlator_t *this);
+
+typedef int (*afr_read_txn_wind_t)(call_frame_t *frame, xlator_t *this,
+ int subvol);
+
+typedef int (*afr_inode_refresh_cbk_t)(call_frame_t *frame, xlator_t *this,
+ int err);
+
+typedef int (*afr_changelog_resume_t)(call_frame_t *frame, xlator_t *this);
+
+#define AFR_COUNT(array, max) \
+ ({ \
+ int __i; \
+ int __res = 0; \
+ for (__i = 0; __i < max; __i++) \
+ if (array[__i]) \
+ __res++; \
+ __res; \
+ })
+#define AFR_INTERSECT(dst, src1, src2, max) \
+ ({ \
+ int __i; \
+ for (__i = 0; __i < max; __i++) \
+ dst[__i] = src1[__i] && src2[__i]; \
+ })
+#define AFR_CMP(a1, a2, len) \
+ ({ \
+ int __cmp = 0; \
+ int __i; \
+ for (__i = 0; __i < len; __i++) \
+ if (a1[__i] != a2[__i]) { \
+ __cmp = 1; \
+ break; \
+ } \
+ __cmp; \
+ })
+#define AFR_IS_ARBITER_BRICK(priv, index) \
+ ((priv->arbiter_count == 1) && (index == ARBITER_BRICK_INDEX))
+
+#define AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN(ret, errnum) \
+ do { \
+ local->op_ret = ret; \
+ local->op_errno = errnum; \
+ if (local->op_errno == EIO) \
+ gf_msg(this->name, GF_LOG_ERROR, local->op_errno, \
+ AFR_MSG_SPLIT_BRAIN, \
+ "Failing %s on gfid %s: " \
+ "split-brain observed.", \
+ gf_fop_list[local->op], uuid_utoa(local->inode->gfid)); \
+ } while (0)
+
+#define AFR_ERROR_OUT_IF_FDCTX_INVALID(__fd, __this, __error, __label) \
+ do { \
+ afr_fd_ctx_t *__fd_ctx = NULL; \
+ __fd_ctx = afr_fd_ctx_get(__fd, __this); \
+ if (__fd_ctx && __fd_ctx->is_fd_bad) { \
+ __error = EBADF; \
+ goto __label; \
+ } \
+ } while (0)
-#define AFR_LOCKEE_COUNT_MAX 3
-#define AFR_DOM_COUNT_MAX 3
-#define AFR_NUM_CHANGE_LOGS 3 /*data + metadata + entry*/
+typedef enum {
+ AFR_READ_POLICY_FIRST_UP,
+ AFR_READ_POLICY_GFID_HASH,
+ AFR_READ_POLICY_GFID_PID_HASH,
+ AFR_READ_POLICY_LESS_LOAD,
+ AFR_READ_POLICY_LEAST_LATENCY,
+ AFR_READ_POLICY_LOAD_LATENCY_HYBRID,
+} afr_read_hash_mode_t;
-typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this);
+typedef enum {
+ AFR_FAV_CHILD_NONE,
+ AFR_FAV_CHILD_BY_SIZE,
+ AFR_FAV_CHILD_BY_CTIME,
+ AFR_FAV_CHILD_BY_MTIME,
+ AFR_FAV_CHILD_BY_MAJORITY,
+ AFR_FAV_CHILD_POLICY_MAX,
+} afr_favorite_child_policy;
-typedef int (*afr_read_txn_wind_t) (call_frame_t *frame, xlator_t *this, int subvol);
+typedef enum {
+ AFR_SELFHEAL_DATA_FULL = 0,
+ AFR_SELFHEAL_DATA_DIFF,
+ AFR_SELFHEAL_DATA_DYNAMIC,
+} afr_data_self_heal_type_t;
-typedef int (*afr_inode_refresh_cbk_t) (call_frame_t *frame, xlator_t *this, int err);
+typedef enum {
+ AFR_CHILD_UNKNOWN = -1,
+ AFR_CHILD_ZERO,
+ AFR_CHILD_ONE,
+ AFR_CHILD_THIN_ARBITER,
+} afr_child_index;
-typedef int (*afr_changelog_resume_t) (call_frame_t *frame, xlator_t *this);
+typedef enum {
+ TA_WAIT_FOR_NOTIFY_LOCK_REL, /*FOP came after notify domain lock upcall
+ notification and waiting for its release.*/
+ TA_GET_INFO_FROM_TA_FILE, /*FOP needs post-op on ta file to get
+ *info about which brick is bad.*/
+ TA_INFO_IN_MEMORY_SUCCESS, /*Bad brick info is in memory and fop failed
+ *on BAD brick - Success*/
+ TA_INFO_IN_MEMORY_FAILED, /*Bad brick info is in memory and fop failed
+ *on GOOD brick - Failed*/
+ TA_SUCCESS, /*FOP succeeded on both data bricks.*/
+} afr_ta_fop_state_t;
+
+struct afr_nfsd {
+ uint32_t halo_max_latency_msec;
+ gf_boolean_t iamnfsd;
+};
-#define alloca0(size) ({void *__ptr; __ptr = alloca(size); memset(__ptr, 0, size); __ptr;})
-#define AFR_COUNT(array,max) ({int __i; int __res = 0; for (__i = 0; __i < max; __i++) if (array[__i]) __res++; __res;})
-#define AFR_INTERSECT(dst,src1,src2,max) ({int __i; for (__i = 0; __i < max; __i++) dst[__i] = src1[__i] && src2[__i];})
-#define AFR_CMP(a1,a2,len) ({int __cmp = 0; int __i; for (__i = 0; __i < len; __i++) if (a1[__i] != a2[__i]) { __cmp = 1; break;} __cmp;})
+typedef struct _afr_lk_heal_info {
+ fd_t *fd;
+ int32_t cmd;
+ struct gf_flock flock;
+ dict_t *xdata_req;
+ unsigned char *locked_nodes;
+ struct list_head pos;
+ gf_lkowner_t lk_owner;
+ pid_t pid;
+ int32_t *child_up_event_gen;
+ int32_t *child_down_event_gen;
+} afr_lk_heal_info_t;
typedef struct _afr_private {
- gf_lock_t lock; /* to guard access to child_count, etc */
- unsigned int child_count; /* total number of children */
-
- xlator_t **children;
-
- inode_t *root_inode;
-
- unsigned char *child_up;
-
- char **pending_key;
-
- char *data_self_heal; /* on/off/open */
- char * data_self_heal_algorithm; /* name of algorithm */
- unsigned int data_self_heal_window_size; /* max number of pipelined
- read/writes */
-
- unsigned int background_self_heal_count;
- unsigned int background_self_heals_started;
- gf_boolean_t metadata_self_heal; /* on/off */
- gf_boolean_t entry_self_heal; /* on/off */
-
- gf_boolean_t data_change_log; /* on/off */
- gf_boolean_t metadata_change_log; /* on/off */
- gf_boolean_t entry_change_log; /* on/off */
-
- gf_boolean_t metadata_splitbrain_forced_heal; /* on/off */
- int read_child; /* read-subvolume */
- unsigned int hash_mode; /* for when read_child is not set */
- int favorite_child; /* subvolume to be preferred in resolving
- split-brain cases */
-
- gf_boolean_t inodelk_trace;
- gf_boolean_t entrylk_trace;
-
- unsigned int wait_count; /* # of servers to wait for success */
-
- uint64_t up_count; /* number of CHILD_UPs we have seen */
- uint64_t down_count; /* number of CHILD_DOWNs we have seen */
-
- gf_boolean_t optimistic_change_log;
- gf_boolean_t eager_lock;
- gf_boolean_t pre_op_compat; /* on/off */
- uint32_t post_op_delay_secs;
- unsigned int quorum_count;
- gf_boolean_t quorum_reads;
-
- char vol_uuid[UUID_SIZE + 1];
- int32_t *last_event;
-
- /* @event_generation: Keeps count of number of events received which can
- potentially impact consistency decisions. The events are CHILD_UP
- and CHILD_DOWN, when we have to recalculate the freshness/staleness
- of copies to detect if changes had happened while the other server
- was down. CHILD_DOWN and CHILD_UP can also be received on network
- disconnect/reconnects and not necessarily server going down/up.
- Recalculating freshness/staleness on network events is equally
- important as we might have had a network split brain.
- */
- uint32_t event_generation;
-
- gf_boolean_t choose_local;
- gf_boolean_t did_discovery;
- uint64_t sh_readdir_size;
- gf_boolean_t ensure_durability;
- char *sh_domain;
- char *afr_dirty;
-
- afr_self_heald_t shd;
-
- /* pump dependencies */
- void *pump_private;
- gf_boolean_t use_afr_in_pump;
- gf_boolean_t consistent_metadata;
+ gf_lock_t lock; /* to guard access to child_count, etc */
+ unsigned int child_count; /* total number of children */
+ unsigned int arbiter_count; /*subset of child_count.
+ Has to be 0 or 1.*/
+
+ xlator_t **children;
+
+ inode_t *root_inode;
+
+ int favorite_child; /* subvolume to be preferred in resolving
+ split-brain cases */
+ /* For thin-arbiter. */
+ uuid_t ta_gfid;
+ unsigned int thin_arbiter_count; /* 0 or 1 at the moment.*/
+ int ta_bad_child_index;
+ int ta_event_gen;
+ unsigned int ta_in_mem_txn_count;
+ unsigned int ta_on_wire_txn_count;
+ struct list_head ta_waitq;
+ struct list_head ta_onwireq;
+
+ unsigned char *anon_inode;
+ unsigned char *child_up;
+ unsigned char *halo_child_up;
+ int64_t *child_latency;
+ unsigned char *local;
+
+ char **pending_key;
+
+ afr_data_self_heal_type_t data_self_heal_algorithm;
+ unsigned int data_self_heal_window_size; /* max number of pipelined
+ read/writes */
+
+ struct list_head heal_waiting; /*queue for files that need heal*/
+ uint32_t heal_wait_qlen; /*configurable queue length for heal_waiting*/
+ int32_t heal_waiters; /* No. of elements currently in wait queue.*/
+
+ struct list_head healing; /* queue for files that are undergoing
+ background heal*/
+ uint32_t background_self_heal_count; /*configurable queue length for
+ healing queue*/
+ int32_t healers; /* No. of elements currently undergoing background
+ heal*/
+
+ gf_boolean_t release_ta_notify_dom_lock;
+
+ gf_boolean_t metadata_self_heal; /* on/off */
+ gf_boolean_t entry_self_heal; /* on/off */
+
+ gf_boolean_t metadata_splitbrain_forced_heal; /* on/off */
+ int read_child; /* read-subvolume */
+ gf_atomic_t *pending_reads; /*No. of pending read cbks per child.*/
+
+ gf_timer_t *timer; /* launched when parent up is received */
+
+ unsigned int wait_count; /* # of servers to wait for success */
+
+ unsigned char ta_child_up;
+ gf_boolean_t optimistic_change_log;
+ gf_boolean_t eager_lock;
+ gf_boolean_t pre_op_compat; /* on/off */
+ uint32_t post_op_delay_secs;
+ unsigned int quorum_count;
+
+ off_t ta_notify_dom_lock_offset;
+ afr_favorite_child_policy fav_child_policy; /*Policy to use for automatic
+ resolution of split-brains.*/
+ afr_read_hash_mode_t hash_mode; /* for when read_child is not set */
+
+ int32_t *last_event;
+
+ /* @event_generation: Keeps count of number of events received which can
+ potentially impact consistency decisions. The events are CHILD_UP
+ and CHILD_DOWN, when we have to recalculate the freshness/staleness
+ of copies to detect if changes had happened while the other server
+ was down. CHILD_DOWN and CHILD_UP can also be received on network
+ disconnect/reconnects and not necessarily server going down/up.
+ Recalculating freshness/staleness on network events is equally
+ important as we might have had a network split brain.
+ */
+ uint32_t event_generation;
+ char vol_uuid[UUID_SIZE + 1];
+
+ gf_boolean_t choose_local;
+ gf_boolean_t did_discovery;
+ gf_boolean_t ensure_durability;
+ gf_boolean_t halo_enabled;
+ gf_boolean_t consistent_metadata;
+ gf_boolean_t need_heal;
+ gf_boolean_t granular_locks;
+ uint64_t sh_readdir_size;
+ char *sh_domain;
+ char *afr_dirty;
+
+ uint64_t spb_choice_timeout;
+
+ afr_self_heald_t shd;
+ struct afr_nfsd nfsd;
+
+ uint32_t halo_max_latency_msec;
+ uint32_t halo_max_replicas;
+ uint32_t halo_min_replicas;
+
+ gf_boolean_t full_lock;
+ gf_boolean_t esh_granular;
+ gf_boolean_t consistent_io;
+ gf_boolean_t data_self_heal; /* on/off */
+ gf_boolean_t use_anon_inode;
+
+ /*For lock healing.*/
+ struct list_head saved_locks;
+ struct list_head lk_healq;
+
+ /*For anon-inode handling */
+ char anon_inode_name[NAME_MAX + 1];
+ char anon_gfid_str[UUID_SIZE + 1];
} afr_private_t;
-
typedef enum {
- AFR_DATA_TRANSACTION, /* truncate, write, ... */
- AFR_METADATA_TRANSACTION, /* chmod, chown, ... */
- AFR_ENTRY_TRANSACTION, /* create, rmdir, ... */
- AFR_ENTRY_RENAME_TRANSACTION, /* rename */
+ AFR_DATA_TRANSACTION, /* truncate, write, ... */
+ AFR_METADATA_TRANSACTION, /* chmod, chown, ... */
+ AFR_ENTRY_TRANSACTION, /* create, rmdir, ... */
+ AFR_ENTRY_RENAME_TRANSACTION, /* rename */
} afr_transaction_type;
-typedef enum {
- AFR_TRANSACTION_LK,
- AFR_SELFHEAL_LK,
-} transaction_lk_type_t;
-
-typedef enum {
- AFR_LOCK_OP,
- AFR_UNLOCK_OP,
-} afr_lock_op_type_t;
-
-typedef enum {
- AFR_DATA_SELF_HEAL_LK,
- AFR_METADATA_SELF_HEAL_LK,
- AFR_ENTRY_SELF_HEAL_LK,
-}selfheal_lk_type_t;
-
-typedef enum {
- AFR_INODELK_TRANSACTION,
- AFR_INODELK_NB_TRANSACTION,
- AFR_ENTRYLK_TRANSACTION,
- AFR_ENTRYLK_NB_TRANSACTION,
- AFR_INODELK_SELFHEAL,
- AFR_INODELK_NB_SELFHEAL,
- AFR_ENTRYLK_SELFHEAL,
- AFR_ENTRYLK_NB_SELFHEAL,
-} afr_lock_call_type_t;
-
/*
xattr format: trusted.afr.volume = [x y z]
x - data pending
@@ -171,821 +303,940 @@ typedef enum {
*/
static inline int
-afr_index_for_transaction_type (afr_transaction_type type)
+afr_index_for_transaction_type(afr_transaction_type type)
{
- switch (type) {
-
+ switch (type) {
case AFR_DATA_TRANSACTION:
- return 0;
+ return 0;
case AFR_METADATA_TRANSACTION:
- return 1;
+ return 1;
case AFR_ENTRY_TRANSACTION:
case AFR_ENTRY_RENAME_TRANSACTION:
- return 2;
- }
+ return 2;
+ }
- return -1; /* make gcc happy */
+ return -1; /* make gcc happy */
}
static inline int
-afr_index_from_ia_type (ia_type_t type)
+afr_index_from_ia_type(ia_type_t type)
{
- switch (type) {
+ switch (type) {
case IA_IFDIR:
- return afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
+ return afr_index_for_transaction_type(AFR_ENTRY_TRANSACTION);
case IA_IFREG:
- return afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
- default: return -1;
- }
+ return afr_index_for_transaction_type(AFR_DATA_TRANSACTION);
+ default:
+ return -1;
+ }
}
typedef struct {
- loc_t loc;
- char *basename;
- unsigned char *locked_nodes;
- int locked_count;
+ struct gf_flock flock;
+ loc_t loc;
+ fd_t *fd;
+ char *basename;
+ unsigned char *locked_nodes;
+ int locked_count;
-} afr_entry_lockee_t;
+} afr_lockee_t;
int
-afr_entry_lockee_cmp (const void *l1, const void *l2);
+afr_entry_lockee_cmp(const void *l1, const void *l2);
typedef struct {
- char *domain; /* Domain on which inodelk is taken */
- struct gf_flock flock;
- unsigned char *locked_nodes;
- int32_t lock_count;
-} afr_inodelk_t;
+ loc_t *lk_loc;
-typedef struct {
- loc_t *lk_loc;
-
- int lockee_count;
- afr_entry_lockee_t lockee[AFR_LOCKEE_COUNT_MAX];
+ afr_lockee_t lockee[AFR_LOCKEE_COUNT_MAX];
- afr_inodelk_t inodelk[AFR_DOM_COUNT_MAX];
- const char *lk_basename;
- const char *lower_basename;
- const char *higher_basename;
- char lower_locked;
- char higher_locked;
+ const char *lk_basename;
+ const char *lower_basename;
+ const char *higher_basename;
- unsigned char *locked_nodes;
- unsigned char *lower_locked_nodes;
+ unsigned char *lower_locked_nodes;
- selfheal_lk_type_t selfheal_lk_type;
- transaction_lk_type_t transaction_lk_type;
+ afr_lock_cbk_t lock_cbk;
- int32_t lock_count;
- int32_t entrylk_lock_count;
+ int lockee_count;
- uint64_t lock_number;
- int32_t lk_call_count;
- int32_t lk_expected_count;
- int32_t lk_attempted_count;
+ int32_t lk_call_count;
+ int32_t lk_expected_count;
+ int32_t lk_attempted_count;
- int32_t lock_op_ret;
- int32_t lock_op_errno;
- afr_lock_cbk_t lock_cbk;
- char *domain; /* Domain on which inode/entry lock/unlock in progress.*/
+ int32_t lock_op_ret;
+ int32_t lock_op_errno;
+ char *domain; /* Domain on which inode/entry lock/unlock in progress.*/
+ int32_t lock_count;
+ char lower_locked;
+ char higher_locked;
} afr_internal_lock_t;
struct afr_reply {
- int valid;
- int32_t op_ret;
- int32_t op_errno;
- dict_t *xdata;
- struct iatt poststat;
- struct iatt postparent;
- struct iatt prestat;
- struct iatt preparent;
- struct iatt preparent2;
- struct iatt postparent2;
- uint8_t checksum[MD5_DIGEST_LENGTH];
+ int valid;
+ int32_t op_ret;
+ dict_t *xattr; /*For xattrop*/
+ dict_t *xdata;
+ struct iatt poststat;
+ struct iatt postparent;
+ struct iatt prestat;
+ struct iatt preparent;
+ struct iatt preparent2;
+ struct iatt postparent2;
+ int32_t op_errno;
+ /* For rchecksum */
+ uint8_t checksum[SHA256_DIGEST_LENGTH];
+ gf_boolean_t buf_has_zeroes;
+ gf_boolean_t fips_mode_rchecksum;
+ /* For lookup */
+ int8_t need_heal;
};
typedef enum {
- AFR_FD_NOT_OPENED,
- AFR_FD_OPENED,
- AFR_FD_OPENING
+ AFR_FD_NOT_OPENED,
+ AFR_FD_OPENED,
+ AFR_FD_OPENING
} afr_fd_open_status_t;
typedef struct {
- unsigned int *pre_op_done[AFR_NUM_CHANGE_LOGS];
- int inherited[AFR_NUM_CHANGE_LOGS];
- int on_disk[AFR_NUM_CHANGE_LOGS];
- afr_fd_open_status_t *opened_on; /* which subvolumes the fd is open on */
-
- unsigned int *lock_piggyback;
- unsigned int *lock_acquired;
-
- int flags;
-
- /* used for delayed-post-op optimization */
- pthread_mutex_t delay_lock;
- gf_timer_t *delay_timer;
- call_frame_t *delay_frame;
-
- /* set if any write on this fd was a non stable write
- (i.e, without O_SYNC or O_DSYNC)
- */
- gf_boolean_t witnessed_unstable_write;
-
- /* @open_fd_count:
- Number of open FDs queried from the server, as queried through
- xdata in FOPs. Currently, used to decide if eager-locking must be
- temporarily disabled.
- */
- uint32_t open_fd_count;
-
-
- /* list of frames currently in progress */
- struct list_head eager_locked;
-
- /* the subvolume on which the latest sequence of readdirs (starting
- at offset 0) has begun. Till the next readdir request with 0 offset
- arrives, we continue to read off this subvol.
- */
- int readdir_subvol;
+ afr_fd_open_status_t *opened_on; /* which subvolumes the fd is open on */
+ int flags;
+
+ /* the subvolume on which the latest sequence of readdirs (starting
+ at offset 0) has begun. Till the next readdir request with 0 offset
+ arrives, we continue to read off this subvol.
+ */
+ int readdir_subvol;
+ /* lock-healing related members. */
+ gf_boolean_t is_fd_bad;
+ afr_lk_heal_info_t *lk_heal_info;
+
} afr_fd_ctx_t;
+typedef enum {
+ AFR_FOP_LOCK_PARALLEL,
+ AFR_FOP_LOCK_SERIAL,
+ AFR_FOP_LOCK_QUORUM_FAILED,
+} afr_fop_lock_state_t;
+
+typedef struct _afr_inode_lock_t {
+ /* @num_inodelks:
+ Number of inodelks queried from the server, as queried through
+ xdata in FOPs. Currently, used to decide if eager-locking must be
+ temporarily disabled.
+ */
+ int32_t num_inodelks;
+ unsigned int event_generation;
+ gf_timer_t *delay_timer;
+ struct list_head owners; /*Transactions that are performing fop*/
+ struct list_head post_op; /*Transactions that are done with the fop
+ *So can not conflict with the fops*/
+ struct list_head waiting; /*Transaction that are waiting for
+ *conflicting transactions to complete*/
+ struct list_head frozen; /*Transactions that need to go as part of
+ * next batch of eager-lock*/
+ gf_boolean_t release;
+ gf_boolean_t acquired;
+} afr_lock_t;
+
+typedef struct _afr_inode_ctx {
+ uint64_t read_subvol;
+ uint64_t write_subvol;
+ int lock_count;
+ int spb_choice;
+ gf_timer_t *timer;
+ unsigned int *pre_op_done[AFR_NUM_CHANGE_LOGS];
+ int inherited[AFR_NUM_CHANGE_LOGS];
+ int on_disk[AFR_NUM_CHANGE_LOGS];
+ /*Only 2 types of transactions support eager-locks now. DATA/METADATA*/
+ afr_lock_t lock[2];
+
+ /* @open_fd_count:
+ Number of open FDs queried from the server, as queried through
+ xdata in FOPs. Currently, used to decide if eager-locking must be
+ temporarily disabled.
+ */
+ uint32_t open_fd_count;
+ gf_boolean_t need_refresh;
+
+ /* set if any write on this fd was a non stable write
+ (i.e, without O_SYNC or O_DSYNC)
+ */
+ gf_boolean_t witnessed_unstable_write;
+} afr_inode_ctx_t;
typedef struct _afr_local {
- glusterfs_fop_t op;
- unsigned int call_count;
+ glusterfs_fop_t op;
+ unsigned int call_count;
- /* @event_generation: copy of priv->event_generation taken at the
- time of starting the transaction. The copy is made so that we
- have a stable value through the various phases of the transaction.
- */
- unsigned int event_generation;
+ /* @event_generation: copy of priv->event_generation taken at the
+ time of starting the transaction. The copy is made so that we
+ have a stable value through the various phases of the transaction.
+ */
+ unsigned int event_generation;
- uint32_t open_fd_count;
- gf_boolean_t update_open_fd_count;
+ uint32_t open_fd_count;
+ int32_t num_inodelks;
- gf_lkowner_t saved_lk_owner;
+ int32_t op_ret;
+ int32_t op_errno;
- int32_t op_ret;
- int32_t op_errno;
+ int dirty[AFR_NUM_CHANGE_LOGS];
- int32_t **pending;
+ int32_t **pending;
- int dirty[AFR_NUM_CHANGE_LOGS];
+ loc_t loc;
+ loc_t newloc;
- loc_t loc;
- loc_t newloc;
+ fd_t *fd;
+ afr_fd_ctx_t *fd_ctx;
- fd_t *fd;
- afr_fd_ctx_t *fd_ctx;
+ /* @child_up: copy of priv->child_up taken at the time of transaction
+ start. The copy is taken so that we have a stable child_up array
+ through the phases of the transaction as priv->child_up[i] can keep
+ changing through time.
+ */
+ unsigned char *child_up;
- /* @child_up: copy of priv->child_up taken at the time of transaction
- start. The copy is taken so that we have a stable child_up array
- through the phases of the transaction as priv->child_up[i] can keep
- changing through time.
- */
- unsigned char *child_up;
+ /* @read_attempted:
+ array of flags representing subvolumes where read operations of
+ the read transaction have already been attempted. The array is
+ first pre-filled with down subvolumes, and as reads are performed
+ on other subvolumes, those are set as well. This way if the read
+ operation fails we do not retry on that subvolume again.
+ */
+ unsigned char *read_attempted;
- /* @read_attempted:
- array of flags representing subvolumes where read operations of
- the read transaction have already been attempted. The array is
- first pre-filled with down subvolumes, and as reads are performed
- on other subvolumes, those are set as well. This way if the read
- operation fails we do not retry on that subvolume again.
- */
- unsigned char *read_attempted;
+ /* @readfn:
- /* @readfn:
+ pointer to function which will perform the read operation on a given
+ subvolume. Used in read transactions.
+ */
- pointer to function which will perform the read operation on a given
- subvolume. Used in read transactions.
- */
+ afr_read_txn_wind_t readfn;
- afr_read_txn_wind_t readfn;
+ /* @inode:
- /* @refreshed:
+ the inode on which the read txn is performed on. ref'ed and copied
+ from either fd->inode or loc.inode
+ */
- the inode was "refreshed" (i.e, pending xattrs from all subvols
- freshly inspected and inode ctx updated accordingly) as part of
- this transaction already.
- */
- gf_boolean_t refreshed;
+ inode_t *inode;
- /* @inode:
+ /* @parent[2]:
- the inode on which the read txn is performed on. ref'ed and copied
- from either fd->inode or loc.inode
- */
+ parent inode[s] on which directory transactions are performed.
+ */
- inode_t *inode;
+ inode_t *parent;
+ inode_t *parent2;
- /* @parent[2]:
+ /* @readable:
- parent inode[s] on which directory transactions are performed.
- */
+ array of flags representing servers from which a read can be
+ performed. This is the output of afr_inode_refresh()
+ */
+ unsigned char *readable;
+ unsigned char *readable2; /*For rename transaction*/
- inode_t *parent;
- inode_t *parent2;
+ afr_inode_refresh_cbk_t refreshfn;
- /* @readable:
+ /* @refreshinode:
- array of flags representing servers from which a read can be
- performed. This is the output of afr_inode_refresh()
- */
- unsigned char *readable;
+ Inode currently getting refreshed.
+ */
+ inode_t *refreshinode;
- afr_inode_refresh_cbk_t refreshfn;
+ dict_t *xattr_req;
- /* @refreshinode:
+ dict_t *dict;
- Inode currently getting refreshed.
- */
- inode_t *refreshinode;
+ int read_subvol; /* Current read subvolume */
- /*
- @pre_op_compat:
+ int optimistic_change_log;
- compatibility mode of pre-op. send a separate pre-op and
- op operations as part of transaction, rather than combining
- */
+ afr_internal_lock_t internal_lock;
- gf_boolean_t pre_op_compat;
+ /*To handle setattr/setxattr on yet to be linked inode from dht*/
+ uuid_t refreshgfid;
- dict_t *xattr_req;
+ /* @refreshed:
- afr_internal_lock_t internal_lock;
+ the inode was "refreshed" (i.e, pending xattrs from all subvols
+ freshly inspected and inode ctx updated accordingly) as part of
+ this transaction already.
+ */
+ gf_boolean_t refreshed;
- dict_t *dict;
+ gf_boolean_t update_num_inodelks;
+ gf_boolean_t update_open_fd_count;
- int optimistic_change_log;
- gf_boolean_t delayed_post_op;
+ /*
+ @pre_op_compat:
- /* Is the current writev() going to perform a stable write?
- i.e, is fd->flags or @flags writev param have O_SYNC or
- O_DSYNC?
- */
- gf_boolean_t stable_write;
+ compatibility mode of pre-op. send a separate pre-op and
+ op operations as part of transaction, rather than combining
+ */
- /* This write appended to the file. Nnot necessarily O_APPEND,
- just means the offset of write was at the end of file.
- */
- gf_boolean_t append_write;
+ gf_boolean_t pre_op_compat;
- /*
- This struct contains the arguments for the "continuation"
- (scheme-like) of fops
- */
+ /* Is the current writev() going to perform a stable write?
+ i.e, is fd->flags or @flags writev param have O_SYNC or
+ O_DSYNC?
+ */
+ gf_boolean_t stable_write;
+
+ /* This write appended to the file. Nnot necessarily O_APPEND,
+ just means the offset of write was at the end of file.
+ */
+ gf_boolean_t append_write;
+
+ /*
+ This struct contains the arguments for the "continuation"
+ (scheme-like) of fops
+ */
+
+ struct {
+ struct {
+ struct statvfs buf;
+ unsigned char buf_set;
+ } statfs;
+
+ struct {
+ fd_t *fd;
+ int32_t flags;
+ } open;
+
+ struct {
+ struct gf_flock user_flock;
+ struct gf_flock ret_flock;
+ unsigned char *locked_nodes;
+ int32_t cmd;
+ /*For lock healing only.*/
+ unsigned char *dom_locked_nodes;
+ int32_t *dom_lock_op_ret;
+ int32_t *dom_lock_op_errno;
+ struct gf_flock *getlk_rsp;
+ } lk;
+
+ /* inode read */
+
+ struct {
+ int32_t mask;
+ int last_index; /* index of the child we tried previously */
+ } access;
+
+ struct {
+ int last_index;
+ } stat;
+
+ struct {
+ int last_index;
+ } fstat;
+
+ struct {
+ size_t size;
+ int last_index;
+ } readlink;
+
+ struct {
+ char *name;
+ long xattr_len;
+ int last_index;
+ } getxattr;
+
+ struct {
+ size_t size;
+ off_t offset;
+ int last_index;
+ uint32_t flags;
+ } readv;
+
+ /* dir read */
struct {
- struct {
- gf_boolean_t needs_fresh_lookup;
- uuid_t gfid_req;
- } lookup;
-
- struct {
- unsigned char buf_set;
- struct statvfs buf;
- } statfs;
-
- struct {
- int32_t flags;
- } open;
-
- struct {
- int32_t cmd;
- struct gf_flock user_flock;
- struct gf_flock ret_flock;
- unsigned char *locked_nodes;
- } lk;
-
- /* inode read */
-
- struct {
- int32_t mask;
- int last_index; /* index of the child we tried previously */
- } access;
-
- struct {
- int last_index;
- } stat;
-
- struct {
- int last_index;
- } fstat;
-
- struct {
- size_t size;
- int last_index;
- } readlink;
-
- struct {
- char *name;
- int last_index;
- long xattr_len;
- } getxattr;
-
- struct {
- size_t size;
- off_t offset;
- int last_index;
- uint32_t flags;
- } readv;
-
- /* dir read */
-
- struct {
- int success_count;
- int32_t op_ret;
- int32_t op_errno;
-
- uint32_t *checksum;
- } opendir;
-
- struct {
- int32_t op_ret;
- int32_t op_errno;
- size_t size;
- off_t offset;
- dict_t *dict;
- gf_boolean_t failed;
- int last_index;
- } readdir;
- /* inode write */
-
- struct {
- struct iatt prebuf;
- struct iatt postbuf;
- } inode_wfop; //common structure for all inode-write-fops
-
- struct {
- int32_t op_ret;
-
- struct iovec *vector;
- struct iobref *iobref;
- int32_t count;
- off_t offset;
- uint32_t flags;
- } writev;
-
- struct {
- off_t offset;
- } truncate;
-
- struct {
- off_t offset;
- } ftruncate;
-
- struct {
- struct iatt in_buf;
- int32_t valid;
- } setattr;
-
- struct {
- struct iatt in_buf;
- int32_t valid;
- } fsetattr;
-
- struct {
- dict_t *dict;
- int32_t flags;
- } setxattr;
-
- struct {
- dict_t *dict;
- int32_t flags;
- } fsetxattr;
-
- struct {
- char *name;
- } removexattr;
-
- struct {
- dict_t *xattr;
- } xattrop;
-
- struct {
- dict_t *xattr;
- } fxattrop;
-
- /* dir write */
-
- struct {
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- struct iatt prenewparent;
- struct iatt postnewparent;
- } dir_fop; //common structure for all dir fops
-
- struct {
- fd_t *fd;
- dict_t *params;
- int32_t flags;
- mode_t mode;
- } create;
-
- struct {
- dev_t dev;
- mode_t mode;
- dict_t *params;
- } mknod;
-
- struct {
- int32_t mode;
- dict_t *params;
- } mkdir;
-
- struct {
- int flags;
- } rmdir;
-
- struct {
- dict_t *params;
- char *linkpath;
- } symlink;
-
- struct {
- int32_t mode;
- off_t offset;
- size_t len;
- } fallocate;
-
- struct {
- off_t offset;
- size_t len;
- } discard;
-
- struct {
- off_t offset;
- off_t len;
- struct iatt prebuf;
- struct iatt postbuf;
- } zerofill;
-
- struct {
- char *volume;
- int32_t cmd;
- struct gf_flock flock;
- } inodelk;
-
- } cont;
+ uint32_t *checksum;
+ int success_count;
+ int32_t op_ret;
+ int32_t op_errno;
+ } opendir;
struct {
- off_t start, len;
+ int32_t op_ret;
+ int32_t op_errno;
+ size_t size;
+ off_t offset;
+ dict_t *dict;
+ int last_index;
+ gf_boolean_t failed;
+ } readdir;
+ /* inode write */
- gf_boolean_t eager_lock_on;
- int *eager_lock;
+ struct {
+ struct iatt prebuf;
+ struct iatt postbuf;
+ } inode_wfop; // common structure for all inode-write-fops
+
+ struct {
+ struct iovec *vector;
+ struct iobref *iobref;
+ off_t offset;
+ int32_t op_ret;
+ int32_t count;
+ uint32_t flags;
+ } writev;
+
+ struct {
+ off_t offset;
+ } truncate;
+
+ struct {
+ off_t offset;
+ } ftruncate;
+
+ struct {
+ struct iatt in_buf;
+ int32_t valid;
+ } setattr;
+
+ struct {
+ struct iatt in_buf;
+ int32_t valid;
+ } fsetattr;
+
+ struct {
+ dict_t *dict;
+ int32_t flags;
+ } setxattr;
+
+ struct {
+ dict_t *dict;
+ int32_t flags;
+ } fsetxattr;
+
+ struct {
+ char *name;
+ } removexattr;
+
+ struct {
+ dict_t *xattr;
+ gf_xattrop_flags_t optype;
+ } xattrop;
+
+ /* dir write */
+
+ struct {
+ inode_t *inode;
+ struct iatt buf;
+ struct iatt preparent;
+ struct iatt postparent;
+ struct iatt prenewparent;
+ struct iatt postnewparent;
+ } dir_fop; // common structure for all dir fops
+
+ struct {
+ fd_t *fd;
+ dict_t *params;
+ int32_t flags;
+ mode_t mode;
+ } create;
- char *basename;
- char *new_basename;
+ struct {
+ dict_t *params;
+ dev_t dev;
+ mode_t mode;
+ } mknod;
+
+ struct {
+ dict_t *params;
+ int32_t mode;
+ } mkdir;
+
+ struct {
+ dict_t *params;
+ char *linkpath;
+ } symlink;
+
+ struct {
+ off_t offset;
+ size_t len;
+ int32_t mode;
+ } fallocate;
+
+ struct {
+ off_t offset;
+ size_t len;
+ } discard;
- loc_t parent_loc;
- loc_t new_parent_loc;
+ struct {
+ off_t offset;
+ off_t len;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ } zerofill;
+
+ struct {
+ char *volume;
+ int32_t cmd;
+ int32_t in_cmd;
+ struct gf_flock in_flock;
+ struct gf_flock flock;
+ void *xdata;
+ } inodelk;
- afr_transaction_type type;
+ struct {
+ char *volume;
+ char *basename;
+ void *xdata;
+ entrylk_cmd in_cmd;
+ entrylk_cmd cmd;
+ entrylk_type type;
+ } entrylk;
- /* stub to resume on destruction
- of the transaction frame */
- call_stub_t *resume_stub;
+ struct {
+ off_t offset;
+ gf_seek_what_t what;
+ } seek;
- struct list_head eager_locked;
+ struct {
+ struct gf_lease user_lease;
+ struct gf_lease ret_lease;
+ unsigned char *locked_nodes;
+ } lease;
- unsigned char *pre_op;
+ struct {
+ int flags;
+ } rmdir;
- /* @fop_subvols: subvolumes on which FOP will be attempted */
- unsigned char *fop_subvols;
+ struct {
+ int32_t datasync;
+ } fsync;
- /* @failed_subvols: subvolumes on which FOP failed. Always
- a subset of @fop_subvols */
- unsigned char *failed_subvols;
+ struct {
+ uuid_t gfid_req;
+ gf_boolean_t needs_fresh_lookup;
+ } lookup;
- /* @dirtied: flag which indicates whether we set dirty flag
- in the OP. Typically true when we are performing operation
- on more than one subvol and optimistic changelog is disabled
+ } cont;
- A 'true' value set in @dirtied flag means an 'undirtying'
- has to be done in POST-OP phase.
- */
- gf_boolean_t dirtied;
+ struct {
+ char *basename;
+ char *new_basename;
- /* @inherited: flag which indicates that the dirty flags
- of the previous transaction were inherited
- */
- gf_boolean_t inherited;
+ loc_t parent_loc;
+ loc_t new_parent_loc;
- /*
- @no_uninherit: flag which indicates that a pre_op_uninherit()
- must _not_ be attempted (and returned as failure) always. This
- flag is set when a hard pre-op is performed, but not accounted
- for it in fd_ctx->on_disk[]. Such transactions are "isolated"
- from the pre-op piggybacking entirely and therefore uninherit
- must not be attempted.
- */
- gf_boolean_t no_uninherit;
+ /* stub to resume on destruction
+ of the transaction frame */
+ call_stub_t *resume_stub;
- /* @uninherit_done:
- @uninherit_value:
+ struct list_head owner_list;
+ struct list_head wait_list;
- The above pair variables make pre_op_uninherit() idempotent.
- Both are FALSE initially. The first call to pre_op_uninherit
- sets @uninherit_done to TRUE and the return value to
- @uninherit_value. Further calls will check for @uninherit_done
- to be TRUE and if so will simply return @uninherit_value.
- */
- gf_boolean_t uninherit_done;
- gf_boolean_t uninherit_value;
+ unsigned char *pre_op;
- /* @changelog_resume: function to be called after changlogging
- (either pre-op or post-op) is done
- */
+ /* Changelog xattr dict for [f]xattrop*/
+ dict_t **changelog_xdata;
+ unsigned char *pre_op_sources;
- afr_changelog_resume_t changelog_resume;
+ /* @failed_subvols: subvolumes on which a pre-op or a
+ FOP failed. */
+ unsigned char *failed_subvols;
- call_frame_t *main_frame;
+ call_frame_t *main_frame; /*Fop frame*/
+ call_frame_t *frame; /*Transaction frame*/
- int (*wind) (call_frame_t *frame, xlator_t *this, int subvol);
+ int (*wind)(call_frame_t *frame, xlator_t *this, int subvol);
- int (*fop) (call_frame_t *frame, xlator_t *this);
+ int (*unwind)(call_frame_t *frame, xlator_t *this);
- int (*done) (call_frame_t *frame, xlator_t *this);
+ off_t start, len;
- int (*resume) (call_frame_t *frame, xlator_t *this);
+ afr_transaction_type type;
- int (*unwind) (call_frame_t *frame, xlator_t *this);
+ int32_t in_flight_sb_errno; /* This is where the cause of the
+ failure on the last good copy of
+ the file is stored.
+ */
- /* post-op hook */
- } transaction;
+ /* @changelog_resume: function to be called after changlogging
+ (either pre-op or post-op) is done
+ */
+ afr_changelog_resume_t changelog_resume;
- syncbarrier_t barrier;
+ gf_boolean_t eager_lock_on;
+ gf_boolean_t do_eager_unlock;
- /* extra data for fops */
- dict_t *xdata_req;
- dict_t *xdata_rsp;
+ /* @dirtied: flag which indicates whether we set dirty flag
+ in the OP. Typically true when we are performing operation
+ on more than one subvol and optimistic changelog is disabled
- mode_t umask;
- int xflag;
- gf_boolean_t do_discovery;
- struct afr_reply *replies;
-} afr_local_t;
+ A 'true' value set in @dirtied flag means an 'undirtying'
+ has to be done in POST-OP phase.
+ */
+ gf_boolean_t dirtied;
+ /* @inherited: flag which indicates that the dirty flags
+ of the previous transaction were inherited
+ */
+ gf_boolean_t inherited;
-typedef struct _afr_inode_ctx {
- uint64_t read_subvol;
- int spb_choice;
-} afr_inode_ctx_t;
+ /*
+ @no_uninherit: flag which indicates that a pre_op_uninherit()
+ must _not_ be attempted (and returned as failure) always. This
+ flag is set when a hard pre-op is performed, but not accounted
+ for it in fd_ctx->on_disk[]. Such transactions are "isolated"
+ from the pre-op piggybacking entirely and therefore uninherit
+ must not be attempted.
+ */
+ gf_boolean_t no_uninherit;
+
+ gf_boolean_t in_flight_sb; /* Indicator for occurrence of
+ split-brain while in the middle of
+ a txn. */
+
+ /* @uninherit_done:
+ @uninherit_value:
+
+ The above pair variables make pre_op_uninherit() idempotent.
+ Both are FALSE initially. The first call to pre_op_uninherit
+ sets @uninherit_done to TRUE and the return value to
+ @uninherit_value. Further calls will check for @uninherit_done
+ to be TRUE and if so will simply return @uninherit_value.
+ */
+ gf_boolean_t uninherit_done;
+ gf_boolean_t uninherit_value;
+
+ gf_boolean_t disable_delayed_post_op;
+ } transaction;
+
+ syncbarrier_t barrier;
+
+ /* extra data for fops */
+ dict_t *xdata_req;
+ dict_t *xdata_rsp;
+
+ dict_t *xattr_rsp; /*for [f]xattrop*/
+
+ mode_t umask;
+ int xflag;
+ struct afr_reply *replies;
+
+ /* For client side background heals. */
+ struct list_head healer;
+ call_frame_t *heal_frame;
+
+ afr_inode_ctx_t *inode_ctx;
+
+ /*For thin-arbiter transactions.*/
+ int ta_failed_subvol;
+ int ta_event_gen;
+ struct list_head ta_waitq;
+ struct list_head ta_onwireq;
+ afr_ta_fop_state_t fop_state;
+ afr_fop_lock_state_t fop_lock_state;
+ gf_lkowner_t saved_lk_owner;
+ unsigned char read_txn_query_child;
+ unsigned char ta_child_up;
+ gf_boolean_t do_discovery;
+ gf_boolean_t need_full_crawl;
+ gf_boolean_t is_read_txn;
+ gf_boolean_t is_new_entry;
+} afr_local_t;
-/* did a call fail due to a child failing? */
-#define child_went_down(op_ret, op_errno) (((op_ret) < 0) && \
- ((op_errno == ENOTCONN) || \
- (op_errno == EBADFD)))
+typedef struct afr_spbc_timeout {
+ call_frame_t *frame;
+ loc_t *loc;
+ int spb_child_index;
+ gf_boolean_t d_spb;
+ gf_boolean_t m_spb;
+} afr_spbc_timeout_t;
+
+typedef struct afr_spb_status {
+ call_frame_t *frame;
+ loc_t *loc;
+} afr_spb_status_t;
+
+typedef struct afr_empty_brick_args {
+ call_frame_t *frame;
+ char *op_type;
+ loc_t loc;
+ int empty_index;
+} afr_empty_brick_args_t;
+
+typedef struct afr_read_subvol_args {
+ ia_type_t ia_type;
+ uuid_t gfid;
+} afr_read_subvol_args_t;
+
+typedef struct afr_granular_esh_args {
+ fd_t *heal_fd;
+ xlator_t *xl;
+ call_frame_t *frame;
+ gf_boolean_t mismatch; /* flag to represent occurrence of type/gfid
+ mismatch */
+} afr_granular_esh_args_t;
int
-afr_inode_read_subvol_get (inode_t *inode, xlator_t *this,
- unsigned char *data_subvols,
- unsigned char *metadata_subvols,
- int *event_generation);
+afr_inode_get_readable(call_frame_t *frame, inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p, int type);
+int
+afr_inode_read_subvol_get(inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvols,
+ int *event_generation);
int
-__afr_inode_read_subvol_get (inode_t *inode, xlator_t *this,
- unsigned char *data_subvols,
- unsigned char *metadata_subvols,
- int *event_generation);
+__afr_inode_read_subvol_get(inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvols,
+ int *event_generation);
int
-__afr_inode_read_subvol_set (inode_t *inode, xlator_t *this,
- unsigned char *data_subvols,
- unsigned char *metadata_subvol,
- int event_generation);
+__afr_inode_read_subvol_set(inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvol,
+ int event_generation);
+int
+afr_inode_read_subvol_set(inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvols,
+ int event_generation);
+
int
-afr_inode_read_subvol_set (inode_t *inode, xlator_t *this,
- unsigned char *data_subvols,
- unsigned char *metadata_subvols,
- int event_generation);
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
int
-afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this);
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
int
-afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this,
- unsigned char *readable);
+afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
+ unsigned char *readable,
+ afr_read_subvol_args_t *args);
int
-afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,
- unsigned char *readable, int *event_p,
- int type);
+afr_inode_read_subvol_type_get(inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p, int type);
int
-afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
- int *event_p, afr_transaction_type type);
+afr_read_subvol_get(inode_t *inode, xlator_t *this, int *subvol_p,
+ unsigned char *readables, int *event_p,
+ afr_transaction_type type, afr_read_subvol_args_t *args);
-#define afr_data_subvol_get(i, t, s, e) \
- afr_read_subvol_get(i, t, s, e, AFR_DATA_TRANSACTION)
+#define afr_data_subvol_get(i, t, s, r, e, a) \
+ afr_read_subvol_get(i, t, s, r, e, AFR_DATA_TRANSACTION, a)
-#define afr_metadata_subvol_get(i, t, s, e) \
- afr_read_subvol_get(i, t, s, e, AFR_METADATA_TRANSACTION)
+#define afr_metadata_subvol_get(i, t, s, r, e, a) \
+ afr_read_subvol_get(i, t, s, r, e, AFR_METADATA_TRANSACTION, a)
int
-afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode,
- afr_inode_refresh_cbk_t cbk);
+afr_inode_refresh(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ uuid_t gfid, afr_inode_refresh_cbk_t cbk);
int32_t
-afr_notify (xlator_t *this, int32_t event, void *data, void *data2);
+afr_notify(xlator_t *this, int32_t event, void *data, void *data2);
+
+int
+xattr_is_equal(dict_t *this, char *key1, data_t *value1, void *data);
int
-xattr_is_equal (dict_t *this, char *key1, data_t *value1, void *data);
+afr_add_entry_lockee(afr_local_t *local, loc_t *loc, char *basename,
+ int child_count);
int
-afr_init_entry_lockee (afr_entry_lockee_t *lockee, afr_local_t *local,
- loc_t *loc, char *basename, int child_count);
+afr_add_inode_lockee(afr_local_t *local, int child_count);
void
-afr_entry_lockee_cleanup (afr_internal_lock_t *int_lock);
+afr_lockees_cleanup(afr_internal_lock_t *int_lock);
int
-afr_attempt_lock_recovery (xlator_t *this, int32_t child_index);
+afr_attempt_lock_recovery(xlator_t *this, int32_t child_index);
int
-afr_mark_locked_nodes (xlator_t *this, fd_t *fd,
- unsigned char *locked_nodes);
+afr_mark_locked_nodes(xlator_t *this, fd_t *fd, unsigned char *locked_nodes);
void
-afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner);
+afr_set_lk_owner(call_frame_t *frame, xlator_t *this, void *lk_owner);
int
-afr_set_lock_number (call_frame_t *frame, xlator_t *this);
+afr_set_lock_number(call_frame_t *frame, xlator_t *this);
int32_t
-afr_unlock (call_frame_t *frame, xlator_t *this);
+afr_unlock(call_frame_t *frame, xlator_t *this);
int
-afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this);
+afr_lock_nonblocking(call_frame_t *frame, xlator_t *this);
int
-afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this);
+afr_blocking_lock(call_frame_t *frame, xlator_t *this);
int
-afr_blocking_lock (call_frame_t *frame, xlator_t *this);
+afr_internal_lock_finish(call_frame_t *frame, xlator_t *this);
int
-afr_internal_lock_finish (call_frame_t *frame, xlator_t *this);
-
-int
-afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom,
- unsigned int child_count);
-
-int
-__afr_fd_ctx_set (xlator_t *this, fd_t *fd);
-
-int
-afr_fd_ctx_set (xlator_t *this, fd_t *fd);
+__afr_fd_ctx_set(xlator_t *this, fd_t *fd);
afr_fd_ctx_t *
-afr_fd_ctx_get (fd_t *fd, xlator_t *this);
+afr_fd_ctx_get(fd_t *fd, xlator_t *this);
int
-afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno);
+afr_build_parent_loc(loc_t *parent, loc_t *child, int32_t *op_errno);
int
-afr_locked_nodes_count (unsigned char *locked_nodes, int child_count);
+afr_locked_nodes_count(unsigned char *locked_nodes, int child_count);
int
-afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode);
+afr_replies_interpret(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ gf_boolean_t *start_heal);
void
-afr_local_replies_wipe (afr_local_t *local, afr_private_t *priv);
+afr_local_replies_wipe(afr_local_t *local, afr_private_t *priv);
void
-afr_local_cleanup (afr_local_t *local, xlator_t *this);
+afr_local_cleanup(afr_local_t *local, xlator_t *this);
int
-afr_frame_return (call_frame_t *frame);
+afr_frame_return(call_frame_t *frame);
int
-afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata);
+afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
void
-afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this);
+afr_local_transaction_cleanup(afr_local_t *local, xlator_t *this);
int
-afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd);
-
-#define AFR_STACK_UNWIND(fop, frame, params ...) \
- do { \
- afr_local_t *__local = NULL; \
- xlator_t *__this = NULL; \
- if (frame) { \
- __local = frame->local; \
- __this = frame->this; \
- frame->local = NULL; \
- } \
- STACK_UNWIND_STRICT (fop, frame, params); \
- if (__local) { \
- afr_local_cleanup (__local, __this); \
- mem_put (__local); \
- } \
- } while (0)
-
-#define AFR_STACK_DESTROY(frame) \
- do { \
- afr_local_t *__local = NULL; \
- xlator_t *__this = NULL; \
- __local = frame->local; \
- __this = frame->this; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- if (__local) { \
- afr_local_cleanup (__local, __this); \
- mem_put (__local); \
- } \
- } while (0);
-
-#define AFR_FRAME_INIT(frame, op_errno) \
- ({frame->local = mem_get0 (THIS->local_pool); \
- if (afr_local_init (frame->local, THIS->private, &op_errno)) { \
- afr_local_cleanup (frame->local, THIS); \
- mem_put (frame->local); \
- frame->local = NULL; }; \
- frame->local;})
-
-#define AFR_STACK_RESET(frame) \
- do { \
- afr_local_t *__local = NULL; \
- xlator_t *__this = NULL; \
- __local = frame->local; \
- __this = frame->this; \
- frame->local = NULL; \
- int __opr; \
- STACK_RESET (frame->root); \
- if (__local) { \
- afr_local_cleanup (__local, __this); \
- mem_put (__local); \
- } \
- AFR_FRAME_INIT (frame, __opr); \
- } while (0)
+afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd);
+
+#define AFR_STACK_UNWIND(fop, frame, op_ret, op_errno, params...) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ int32_t __op_ret = 0; \
+ int32_t __op_errno = 0; \
+ \
+ __op_ret = op_ret; \
+ __op_errno = op_errno; \
+ if (frame) { \
+ __local = frame->local; \
+ __this = frame->this; \
+ afr_handle_inconsistent_fop(frame, &__op_ret, &__op_errno); \
+ if (__local && __local->is_read_txn) \
+ afr_pending_read_decrement(__this->private, \
+ __local->read_subvol); \
+ if (__local && __local->xdata_req && \
+ afr_is_lock_mode_mandatory(__local->xdata_req)) \
+ afr_dom_lock_release(frame); \
+ frame->local = NULL; \
+ } \
+ \
+ STACK_UNWIND_STRICT(fop, frame, __op_ret, __op_errno, params); \
+ if (__local) { \
+ afr_local_cleanup(__local, __this); \
+ mem_put(__local); \
+ } \
+ } while (0)
+
+#define AFR_STACK_DESTROY(frame) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ __local = frame->local; \
+ __this = frame->this; \
+ frame->local = NULL; \
+ STACK_DESTROY(frame->root); \
+ if (__local) { \
+ afr_local_cleanup(__local, __this); \
+ mem_put(__local); \
+ } \
+ } while (0);
+
+#define AFR_FRAME_INIT(frame, op_errno) \
+ ({ \
+ frame->local = mem_get0(THIS->local_pool); \
+ if (afr_local_init(frame->local, frame->this->private, &op_errno)) { \
+ afr_local_cleanup(frame->local, frame->this); \
+ mem_put(frame->local); \
+ frame->local = NULL; \
+ }; \
+ frame->local; \
+ })
+
+#define AFR_STACK_RESET(frame) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ __local = frame->local; \
+ __this = frame->this; \
+ frame->local = NULL; \
+ int __opr; \
+ STACK_RESET(frame->root); \
+ if (__local) { \
+ afr_local_cleanup(__local, __this); \
+ mem_put(__local); \
+ } \
+ AFR_FRAME_INIT(frame, __opr); \
+ } while (0)
/* allocate and return a string that is the basename of argument */
static inline char *
-AFR_BASENAME (const char *str)
+AFR_BASENAME(const char *str)
{
- char *__tmp_str = NULL;
- char *__basename_str = NULL;
- __tmp_str = gf_strdup (str);
- __basename_str = gf_strdup (basename (__tmp_str));
- GF_FREE (__tmp_str);
- return __basename_str;
+ char *__tmp_str = NULL;
+ char *__basename_str = NULL;
+ __tmp_str = gf_strdup(str);
+ __basename_str = gf_strdup(basename(__tmp_str));
+ GF_FREE(__tmp_str);
+ return __basename_str;
}
call_frame_t *
-afr_copy_frame (call_frame_t *base);
+afr_copy_frame(call_frame_t *base);
int
-afr_transaction_local_init (afr_local_t *local, xlator_t *this);
+afr_transaction_local_init(afr_local_t *local, xlator_t *this);
int32_t
-afr_marker_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name,afr_local_t *local, afr_private_t *priv );
+afr_marker_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, afr_local_t *local, afr_private_t *priv);
int
-afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno);
+afr_local_init(afr_local_t *local, afr_private_t *priv, int32_t *op_errno);
int
-afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
- transaction_lk_type_t lk_type);
+afr_internal_lock_init(afr_internal_lock_t *lk, size_t child_count);
int
-afr_higher_errno (int32_t old_errno, int32_t new_errno);
+afr_higher_errno(int32_t old_errno, int32_t new_errno);
int
-afr_final_errno (afr_local_t *local, afr_private_t *priv);
+afr_final_errno(afr_local_t *local, afr_private_t *priv);
int
-afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req);
+afr_xattr_req_prepare(xlator_t *this, dict_t *xattr_req);
void
-afr_fix_open (fd_t *fd, xlator_t *this);
+afr_fix_open(fd_t *fd, xlator_t *this);
afr_fd_ctx_t *
-afr_fd_ctx_get (fd_t *fd, xlator_t *this);
+afr_fd_ctx_get(fd_t *fd, xlator_t *this);
void
-afr_set_low_priority (call_frame_t *frame);
+afr_set_low_priority(call_frame_t *frame);
int
-afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
- int flags);
+afr_child_fd_ctx_set(xlator_t *this, fd_t *fd, int32_t child, int flags);
void
-afr_matrix_cleanup (int32_t **pending, unsigned int m);
+afr_matrix_cleanup(int32_t **pending, unsigned int m);
-int32_t**
-afr_matrix_create (unsigned int m, unsigned int n);
+int32_t **
+afr_matrix_create(unsigned int m, unsigned int n);
-int**
-afr_mark_pending_changelog (afr_private_t *priv, unsigned char *pending,
- dict_t *xattr, ia_type_t iat);
+int **
+afr_mark_pending_changelog(afr_private_t *priv, unsigned char *pending,
+ dict_t *xattr, ia_type_t iat);
void
-afr_filter_xattrs (dict_t *xattr);
+afr_filter_xattrs(dict_t *xattr);
/*
* Special value indicating we should use the "auto" quorum method instead of
@@ -994,48 +1245,179 @@ afr_filter_xattrs (dict_t *xattr);
#define AFR_QUORUM_AUTO INT_MAX
int
-afr_fd_report_unstable_write (xlator_t *this, fd_t *fd);
+afr_fd_report_unstable_write(xlator_t *this, afr_local_t *local);
+
+gf_boolean_t
+afr_fd_has_witnessed_unstable_write(xlator_t *this, inode_t *inode);
+
+void
+afr_reply_wipe(struct afr_reply *reply);
+
+void
+afr_replies_wipe(struct afr_reply *replies, int count);
+
+gf_boolean_t
+afr_xattrs_are_equal(dict_t *dict1, dict_t *dict2);
+
+gf_boolean_t
+afr_is_xattr_ignorable(char *key);
+
+int
+afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc);
+
+int
+afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc);
+
+int
+afr_get_split_brain_status(void *opaque);
+
+int
+afr_get_split_brain_status_cbk(int ret, call_frame_t *frame, void *opaque);
+
+int
+afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this,
+ int spb_choice);
+int
+afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this,
+ call_frame_t *frame, int *spb_subvol);
+int
+afr_get_child_index_from_name(xlator_t *this, char *name);
+
+int
+afr_is_split_brain(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ uuid_t gfid, gf_boolean_t *d_spb, gf_boolean_t *m_spb);
+int
+afr_spb_choice_timeout_cancel(xlator_t *this, inode_t *inode);
+
+int
+afr_set_split_brain_choice(int ret, call_frame_t *frame, void *opaque);
gf_boolean_t
-afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd);
+afr_get_need_heal(xlator_t *this);
void
-afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub);
+afr_set_need_heal(xlator_t *this, afr_local_t *local);
+
+int
+afr_selfheal_data_open(xlator_t *this, inode_t *inode, fd_t **fd);
int
-afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count);
+afr_get_msg_id(char *op_type);
+
+int
+afr_set_in_flight_sb_status(xlator_t *this, call_frame_t *frame,
+ inode_t *inode);
+
+int32_t
+afr_quorum_errno(afr_private_t *priv);
+
+gf_boolean_t
+afr_is_consistent_io_possible(afr_local_t *local, afr_private_t *priv,
+ int32_t *op_errno);
+void
+afr_handle_inconsistent_fop(call_frame_t *frame, int32_t *op_ret,
+ int32_t *op_errno);
+
+void
+afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+void
+afr_process_post_writev(call_frame_t *frame, xlator_t *this);
void
-afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this);
+afr_writev_unwind(call_frame_t *frame, xlator_t *this);
void
-afr_remove_eager_lock_stub (afr_local_t *local);
+afr_writev_copy_outvars(call_frame_t *src_frame, call_frame_t *dst_frame);
void
-afr_replies_wipe (struct afr_reply *replies, int count);
+afr_update_uninodelk(afr_local_t *local, afr_internal_lock_t *int_lock,
+ int32_t child_index);
+afr_fd_ctx_t *
+__afr_fd_ctx_get(fd_t *fd, xlator_t *this);
gf_boolean_t
-afr_xattrs_are_equal (dict_t *dict1, dict_t *dict2);
+afr_is_inode_refresh_reqd(inode_t *inode, xlator_t *this, int event_gen1,
+ int event_gen2);
+int
+afr_serialize_xattrs_with_delimiter(call_frame_t *frame, xlator_t *this,
+ char *buf, const char *default_str,
+ int32_t *serz_len, char delimiter);
gf_boolean_t
-afr_is_xattr_ignorable (char *key);
+afr_is_symmetric_error(call_frame_t *frame, xlator_t *this);
int
-afr_get_heal_info (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xdata);
+__afr_inode_ctx_get(xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx);
+
+uint64_t
+afr_write_subvol_get(call_frame_t *frame, xlator_t *this);
int
-afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc);
+afr_write_subvol_set(call_frame_t *frame, xlator_t *this);
+
+int
+afr_write_subvol_reset(call_frame_t *frame, xlator_t *this);
+
+int
+afr_set_inode_local(xlator_t *this, afr_local_t *local, inode_t *inode);
int
-afr_get_split_brain_status (call_frame_t *frame, xlator_t *this, loc_t *loc);
+afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop);
int
-afr_inode_split_brain_choice_set (inode_t *inode, xlator_t *this,
- int spb_choice);
+afr_ta_post_op_lock(xlator_t *this, loc_t *loc);
+
int
-afr_inode_split_brain_choice_get (inode_t *inode, xlator_t *this,
- int *spb_choice);
+afr_ta_post_op_unlock(xlator_t *this, loc_t *loc);
+
+gf_boolean_t
+afr_is_pending_set(xlator_t *this, dict_t *xdata, int type);
+
int
-afr_get_child_index_from_name (xlator_t *this, char *name);
+__afr_get_up_children_count(afr_private_t *priv);
+
+call_frame_t *
+afr_ta_frame_create(xlator_t *this);
+
+gf_boolean_t
+afr_ta_has_quorum(afr_private_t *priv, afr_local_t *local);
+
+void
+afr_ta_lock_release_synctask(xlator_t *this);
+
+void
+afr_ta_locked_priv_invalidate(afr_private_t *priv);
+
+gf_boolean_t
+afr_lookup_has_quorum(call_frame_t *frame,
+ const unsigned int up_children_count);
+
+void
+afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this);
+
+void
+afr_handle_replies_quorum(call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+afr_ta_dict_contains_pending_xattr(dict_t *dict, afr_private_t *priv,
+ int child);
+
+void
+afr_selfheal_childup(xlator_t *this, afr_private_t *priv);
+
+gf_boolean_t
+afr_is_lock_mode_mandatory(dict_t *xdata);
+
+void
+afr_dom_lock_release(call_frame_t *frame);
+
+void
+afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
+ unsigned char *replies);
+
+gf_boolean_t
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
+ pid_t pid);
#endif /* __AFR_H__ */
diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c
deleted file mode 100644
index ff8f5866f10..00000000000
--- a/xlators/cluster/afr/src/pump.c
+++ /dev/null
@@ -1,2477 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <unistd.h>
-#include <sys/time.h>
-#include <stdlib.h>
-#include <fnmatch.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "afr-common.c"
-#include "defaults.c"
-#include "glusterfs.h"
-#include "pump.h"
-
-
-static int
-afr_set_dict_gfid (dict_t *dict, uuid_t gfid)
-{
- int ret = 0;
- uuid_t *pgfid = NULL;
-
- GF_ASSERT (gfid);
-
- pgfid = GF_CALLOC (1, sizeof (uuid_t), gf_common_mt_char);
- if (!pgfid) {
- ret = -1;
- goto out;
- }
-
- gf_uuid_copy (*pgfid, gfid);
-
- ret = dict_set_dynptr (dict, "gfid-req", pgfid, sizeof (uuid_t));
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR, "gfid set failed");
-
-out:
- if (ret && pgfid)
- GF_FREE (pgfid);
- return ret;
-}
-
-static int
-afr_set_root_gfid (dict_t *dict)
-{
- uuid_t gfid;
- int ret = 0;
-
- memset (gfid, 0, 16);
- gfid[15] = 1;
-
- ret = afr_set_dict_gfid (dict, gfid);
-
- return ret;
-}
-
-static int
-afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
-{
- int ret = -1;
- uuid_t pargfid = {0};
-
- if (!child)
- goto out;
-
- if (!gf_uuid_is_null (parent->inode->gfid))
- gf_uuid_copy (pargfid, parent->inode->gfid);
- else if (!gf_uuid_is_null (parent->gfid))
- gf_uuid_copy (pargfid, parent->gfid);
-
- if (gf_uuid_is_null (pargfid))
- goto out;
-
- if (strcmp (parent->path, "/") == 0)
- ret = gf_asprintf ((char **)&child->path, "/%s", name);
- else
- ret = gf_asprintf ((char **)&child->path, "%s/%s", parent->path,
- name);
-
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed while setting child path");
- }
-
- child->name = strrchr (child->path, '/');
- if (child->name)
- child->name++;
-
- child->parent = inode_ref (parent->inode);
- child->inode = inode_new (parent->inode->table);
- gf_uuid_copy (child->pargfid, pargfid);
-
- if (!child->inode) {
- ret = -1;
- goto out;
- }
-
- ret = 0;
-out:
- if ((ret == -1) && child)
- loc_wipe (child);
-
- return ret;
-}
-
-static void
-afr_build_root_loc (xlator_t *this, loc_t *loc)
-{
- afr_private_t *priv = NULL;
-
- priv = this->private;
- loc->path = gf_strdup ("/");
- loc->name = "";
- loc->inode = inode_ref (priv->root_inode);
- gf_uuid_copy (loc->gfid, loc->inode->gfid);
-}
-
-static void
-afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent)
-{
- GF_ASSERT (loc);
- GF_ASSERT (buf);
-
- gf_uuid_copy (loc->gfid, buf->ia_gfid);
- if (postparent)
- gf_uuid_copy (loc->pargfid, postparent->ia_gfid);
-}
-
-static uint64_t pump_pid = 0;
-static inline void
-pump_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent)
-{
- afr_update_loc_gfids (loc, iatt, parent);
- gf_uuid_copy (loc->inode->gfid, iatt->ia_gfid);
-}
-
-static int
-pump_mark_start_pending (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- pump_priv->pump_start_pending = 1;
-
- return 0;
-}
-
-static int
-is_pump_start_pending (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- return (pump_priv->pump_start_pending);
-}
-
-static int
-pump_remove_start_pending (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- pump_priv->pump_start_pending = 0;
-
- return 0;
-}
-
-static pump_state_t
-pump_get_state ()
-{
- xlator_t *this = NULL;
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- pump_state_t ret;
-
- this = THIS;
- priv = this->private;
- pump_priv = priv->pump_private;
-
- LOCK (&pump_priv->pump_state_lock);
- {
- ret = pump_priv->pump_state;
- }
- UNLOCK (&pump_priv->pump_state_lock);
-
- return ret;
-}
-
-int
-pump_change_state (xlator_t *this, pump_state_t state)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- pump_state_t state_old;
- pump_state_t state_new;
-
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- GF_ASSERT (pump_priv);
-
- LOCK (&pump_priv->pump_state_lock);
- {
- state_old = pump_priv->pump_state;
- state_new = state;
-
- pump_priv->pump_state = state;
-
- }
- UNLOCK (&pump_priv->pump_state_lock);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Pump changing state from %d to %d",
- state_old,
- state_new);
-
- return 0;
-}
-
-static int
-pump_set_resume_path (xlator_t *this, const char *path)
-{
- int ret = 0;
-
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- GF_ASSERT (pump_priv);
-
- LOCK (&pump_priv->resume_path_lock);
- {
- strncpy (pump_priv->resume_path, path, strlen (path) + 1);
- }
- UNLOCK (&pump_priv->resume_path_lock);
-
- return ret;
-}
-
-static int
-pump_save_path (xlator_t *this, const char *path)
-{
- afr_private_t *priv = NULL;
- pump_state_t state;
- dict_t *dict = NULL;
- loc_t loc = {0};
- int dict_ret = 0;
- int ret = -1;
-
- state = pump_get_state ();
- if (state == PUMP_STATE_RESUME)
- return 0;
-
- priv = this->private;
-
- GF_ASSERT (priv->root_inode);
-
- afr_build_root_loc (this, &loc);
-
- dict = dict_new ();
- dict_ret = dict_set_str (dict, PUMP_PATH, (char *)path);
- if (dict_ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set the key %s", path, PUMP_PATH);
-
- ret = syncop_setxattr (PUMP_SOURCE_CHILD (this), &loc, dict, 0, NULL,
- NULL);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "setxattr failed - could not save path=%s", path);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "setxattr succeeded - saved path=%s", path);
- }
-
- dict_unref (dict);
-
- loc_wipe (&loc);
- return 0;
-}
-
-static int
-pump_check_and_update_status (xlator_t *this)
-{
- pump_state_t state;
- int ret = -1;
-
- state = pump_get_state ();
-
- switch (state) {
-
- case PUMP_STATE_RESUME:
- case PUMP_STATE_RUNNING:
- {
- ret = 0;
- break;
- }
- case PUMP_STATE_PAUSE:
- {
- ret = -1;
- break;
- }
- case PUMP_STATE_ABORT:
- {
- pump_save_path (this, "/");
- ret = -1;
- break;
- }
- default:
- {
- gf_log (this->name, GF_LOG_DEBUG,
- "Unknown pump state");
- ret = -1;
- break;
- }
-
- }
-
- return ret;
-}
-
-static const char *
-pump_get_resume_path (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- const char *resume_path = NULL;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- resume_path = pump_priv->resume_path;
-
- return resume_path;
-}
-
-static int
-pump_update_resume_state (xlator_t *this, const char *path)
-{
- pump_state_t state;
- const char *resume_path = NULL;
-
- state = pump_get_state ();
-
- if (state == PUMP_STATE_RESUME) {
- resume_path = pump_get_resume_path (this);
- if (strcmp (resume_path, "/") == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Reached the resume path (/). Proceeding to change state"
- " to running");
- pump_change_state (this, PUMP_STATE_RUNNING);
- } else if (strcmp (resume_path, path) == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Reached the resume path. Proceeding to change state"
- " to running");
- pump_change_state (this, PUMP_STATE_RUNNING);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Not yet hit the resume path:res-path=%s,path=%s",
- resume_path, path);
- }
- }
-
- return 0;
-}
-
-static gf_boolean_t
-is_pump_traversal_allowed (xlator_t *this, const char *path)
-{
- pump_state_t state;
- const char *resume_path = NULL;
- gf_boolean_t ret = _gf_true;
-
- state = pump_get_state ();
-
- if (state == PUMP_STATE_RESUME) {
- resume_path = pump_get_resume_path (this);
- if (strstr (resume_path, path)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "On the right path to resumption path");
- ret = _gf_true;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Not the right path to resuming=> ignoring traverse");
- ret = _gf_false;
- }
- }
-
- return ret;
-}
-
-static int
-pump_save_file_stats (xlator_t *this, const char *path)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- LOCK (&pump_priv->resume_path_lock);
- {
- pump_priv->number_files_pumped++;
-
- strncpy (pump_priv->current_file, path,
- PATH_MAX);
- }
- UNLOCK (&pump_priv->resume_path_lock);
-
- return 0;
-}
-
-static int
-gf_pump_traverse_directory (loc_t *loc)
-{
- xlator_t *this = NULL;
- fd_t *fd = NULL;
- off_t offset = 0;
- loc_t entry_loc = {0};
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
- gf_dirent_t entries;
- struct iatt iatt = {0};
- struct iatt parent = {0};
- dict_t *xattr_rsp = NULL;
- int ret = 0;
- gf_boolean_t is_directory_empty = _gf_true;
- gf_boolean_t free_entries = _gf_false;
-
- INIT_LIST_HEAD (&entries.list);
- this = THIS;
-
- GF_ASSERT (loc->inode);
-
- fd = fd_create (loc->inode, pump_pid);
- if (!fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to create fd for %s", loc->path);
- goto out;
- }
-
- ret = syncop_opendir (this, loc, fd, NULL, NULL);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "opendir failed on %s", loc->path);
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "pump opendir on %s returned=%d",
- loc->path, ret);
-
- while (syncop_readdirp (this, fd, 131072, offset, &entries, NULL,
- NULL)) {
- free_entries = _gf_true;
-
- if (list_empty (&entries.list)) {
- gf_log (this->name, GF_LOG_TRACE,
- "no more entries in directory");
- goto out;
- }
-
- list_for_each_entry_safe (entry, tmp, &entries.list, list) {
- gf_log (this->name, GF_LOG_DEBUG,
- "found readdir entry=%s", entry->d_name);
-
- offset = entry->d_off;
- if (gf_uuid_is_null (entry->d_stat.ia_gfid)) {
- gf_log (this->name, GF_LOG_WARNING, "%s/%s: No "
- "gfid present skipping",
- loc->path, entry->d_name);
- continue;
- }
- loc_wipe (&entry_loc);
- ret = afr_build_child_loc (this, &entry_loc, loc,
- entry->d_name);
- if (ret)
- goto out;
-
- if ((strcmp (entry->d_name, ".") == 0) ||
- (strcmp (entry->d_name, "..") == 0))
- continue;
-
- is_directory_empty = _gf_false;
- gf_log (this->name, GF_LOG_DEBUG,
- "lookup %s => %"PRId64,
- entry_loc.path,
- iatt.ia_ino);
-
- ret = syncop_lookup (this, &entry_loc, &iatt, &parent,
- NULL, &xattr_rsp);
-
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: lookup failed", entry_loc.path);
- continue;
- }
-
- ret = afr_selfheal_name (this, loc->gfid, entry->d_name,
- NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: name self-heal failed (%s/%s)",
- entry_loc.path, uuid_utoa (loc->gfid),
- entry->d_name);
- continue;
- }
-
- ret = afr_selfheal (this, iatt.ia_gfid);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: self-heal failed (%s)",
- entry_loc.path, uuid_utoa (iatt.ia_gfid));
- continue;
- }
-
- pump_fill_loc_info (&entry_loc, &iatt, &parent);
-
- pump_update_resume_state (this, entry_loc.path);
-
- pump_save_path (this, entry_loc.path);
- pump_save_file_stats (this, entry_loc.path);
-
- ret = pump_check_and_update_status (this);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Pump beginning to exit out");
- goto out;
- }
-
- if (IA_ISDIR (iatt.ia_type)) {
- if (is_pump_traversal_allowed (this, entry_loc.path)) {
- gf_log (this->name, GF_LOG_TRACE,
- "entering dir=%s", entry->d_name);
- gf_pump_traverse_directory (&entry_loc);
- }
- }
- }
-
- gf_dirent_free (&entries);
- free_entries = _gf_false;
- gf_log (this->name, GF_LOG_TRACE, "offset incremented to %d",
- (int32_t ) offset);
-
- }
-
- ret = syncop_close (fd);
- if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG, "closing the fd failed");
-
- if (is_directory_empty && (strcmp (loc->path, "/") == 0)) {
- pump_change_state (this, PUMP_STATE_RUNNING);
- gf_log (this->name, GF_LOG_INFO, "Empty source brick. "
- "Nothing to be done.");
- }
-
-out:
- if (entry_loc.path)
- loc_wipe (&entry_loc);
- if (free_entries)
- gf_dirent_free (&entries);
- return 0;
-}
-
-static int
-pump_update_resume_path (xlator_t *this)
-{
- const char *resume_path = NULL;
-
- resume_path = pump_get_resume_path (this);
-
- if (resume_path) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Found a path to resume from: %s",
- resume_path);
-
- }else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Did not find a path=> setting to '/'");
- pump_set_resume_path (this, "/");
- }
-
- pump_change_state (this, PUMP_STATE_RESUME);
-
- return 0;
-}
-
-static int32_t
-pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- loc_t loc = {0};
- int i = 0;
- int ret = 0;
- int source = 0;
- int sink = 1;
-
- priv = this->private;
-
- afr_build_root_loc (this, &loc);
-
- ret = syncop_removexattr (priv->children[source], &loc,
- PUMP_PATH, 0, NULL);
-
- ret = syncop_removexattr (priv->children[sink], &loc,
- PUMP_SINK_COMPLETE, 0, NULL);
-
- for (i = 0; i < priv->child_count; i++) {
- ret = syncop_removexattr (priv->children[i], &loc,
- PUMP_SOURCE_COMPLETE, 0, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG, "removexattr "
- "failed with %s", strerror (-ret));
- }
- }
-
- loc_wipe (&loc);
- return pump_command_reply (frame, this);
-}
-
-static int
-pump_complete_migration (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
- dict_t *dict = NULL;
- pump_state_t state;
- loc_t loc = {0};
- int dict_ret = 0;
- int ret = -1;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- GF_ASSERT (priv->root_inode);
-
- afr_build_root_loc (this, &loc);
-
- dict = dict_new ();
-
- state = pump_get_state ();
- if (state == PUMP_STATE_RUNNING) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Pump finished pumping");
-
- pump_priv->pump_finished = _gf_true;
-
- dict_ret = dict_set_str (dict, PUMP_SOURCE_COMPLETE, "jargon");
- if (dict_ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set the key %s",
- loc.path, PUMP_SOURCE_COMPLETE);
-
- ret = syncop_setxattr (PUMP_SOURCE_CHILD (this), &loc, dict, 0,
- NULL, NULL);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setxattr failed - while notifying source complete");
- }
- dict_ret = dict_set_str (dict, PUMP_SINK_COMPLETE, "jargon");
- if (dict_ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set the key %s",
- loc.path, PUMP_SINK_COMPLETE);
-
- ret = syncop_setxattr (PUMP_SINK_CHILD (this), &loc, dict, 0,
- NULL, NULL);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setxattr failed - while notifying sink complete");
- }
-
- pump_save_path (this, "/");
-
- } else if (state == PUMP_STATE_ABORT) {
- gf_log (this->name, GF_LOG_DEBUG, "Starting cleanup "
- "of pump internal xattrs");
- call_resume (pump_priv->cleaner);
- }
-
- loc_wipe (&loc);
- return 0;
-}
-
-static int
-pump_lookup_sink (loc_t *loc)
-{
- xlator_t *this = NULL;
- struct iatt iatt, parent;
- dict_t *xattr_rsp;
- dict_t *xattr_req = NULL;
- int ret = 0;
-
- this = THIS;
-
- xattr_req = dict_new ();
-
- ret = afr_set_root_gfid (xattr_req);
- if (ret)
- goto out;
-
- ret = syncop_lookup (PUMP_SINK_CHILD (this), loc, &iatt, &parent,
- xattr_req, &xattr_rsp);
-
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Lookup on sink child failed");
- ret = -1;
- goto out;
- }
-
-out:
- if (xattr_req)
- dict_unref (xattr_req);
-
- return ret;
-}
-
-static int
-pump_task (void *data)
-{
- xlator_t *this = NULL;
- afr_private_t *priv = NULL;
-
-
- loc_t loc = {0};
- struct iatt iatt, parent;
- dict_t *xattr_rsp = NULL;
- dict_t *xattr_req = NULL;
-
- int ret = -1;
-
- this = THIS;
- priv = this->private;
-
- GF_ASSERT (priv->root_inode);
-
- afr_build_root_loc (this, &loc);
- xattr_req = dict_new ();
- if (!xattr_req) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Out of memory");
- ret = -1;
- goto out;
- }
-
- afr_set_root_gfid (xattr_req);
- ret = syncop_lookup (this, &loc, &iatt, &parent,
- xattr_req, &xattr_rsp);
-
- gf_log (this->name, GF_LOG_TRACE,
- "lookup: path=%s gfid=%s",
- loc.path, uuid_utoa (loc.inode->gfid));
-
- ret = pump_check_and_update_status (this);
- if (ret < 0) {
- goto out;
- }
-
- pump_update_resume_path (this);
-
- afr_set_root_gfid (xattr_req);
- ret = pump_lookup_sink (&loc);
- if (ret) {
- pump_update_resume_path (this);
- goto out;
- }
-
- gf_pump_traverse_directory (&loc);
-
- pump_complete_migration (this);
-out:
- if (xattr_req)
- dict_unref (xattr_req);
-
- loc_wipe (&loc);
- return 0;
-}
-
-
-static int
-pump_task_completion (int ret, call_frame_t *sync_frame, void *data)
-{
- xlator_t *this = NULL;
- afr_private_t *priv = NULL;
-
- this = THIS;
-
- priv = this->private;
-
- inode_unref (priv->root_inode);
- STACK_DESTROY (sync_frame->root);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Pump xlator exiting");
- return 0;
-}
-
-int
-pump_start (call_frame_t *pump_frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- int ret = -1;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- afr_set_lk_owner (pump_frame, this, pump_frame->root);
- pump_pid = (uint64_t) (unsigned long)pump_frame->root;
-
- ret = synctask_new (pump_priv->env, pump_task,
- pump_task_completion,
- pump_frame, NULL);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "starting pump failed");
- pump_change_state (this, PUMP_STATE_ABORT);
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "setting pump as started lk_owner: %s %"PRIu64,
- lkowner_utoa (&pump_frame->root->lk_owner), pump_pid);
-
- priv->use_afr_in_pump = 1;
-out:
- return ret;
-}
-
-static int
-pump_start_synctask (xlator_t *this)
-{
- call_frame_t *frame = NULL;
- int ret = 0;
-
- frame = create_frame (this, this->ctx->pool);
- if (!frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- ret = -1;
- goto out;
- }
-
- pump_change_state (this, PUMP_STATE_RUNNING);
-
- ret = pump_start (frame, this);
-
-out:
- return ret;
-}
-
-int32_t
-pump_cmd_start_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-
-{
- call_frame_t *prev = NULL;
- afr_local_t *local = NULL;
- int ret = 0;
-
- local = frame->local;
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not initiate destination "
- "brick connect");
- ret = op_ret;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Successfully initiated destination "
- "brick connect");
-
- pump_mark_start_pending (this);
-
- /* send the PARENT_UP as pump is ready now */
- prev = cookie;
- if (prev && prev->this)
- prev->this->notify (prev->this, GF_EVENT_PARENT_UP, this);
-
-out:
- local->op_ret = ret;
- pump_command_reply (frame, this);
-
- return 0;
-}
-
-static int
-pump_initiate_sink_connect (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- dict_t *dict = NULL;
- data_t *data = NULL;
- char *clnt_cmd = NULL;
- loc_t loc = {0};
-
- int ret = 0;
-
- priv = this->private;
- local = frame->local;
-
- GF_ASSERT (priv->root_inode);
-
- afr_build_root_loc (this, &loc);
-
- data = data_ref (dict_get (local->dict, RB_PUMP_CMD_START));
- if (!data) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "Could not get destination brick value");
- goto out;
- }
-
- dict = dict_new ();
- if (!dict) {
- ret = -1;
- goto out;
- }
-
- clnt_cmd = GF_CALLOC (1, data->len+1, gf_common_mt_char);
- if (!clnt_cmd) {
- ret = -1;
- goto out;
- }
-
- memcpy (clnt_cmd, data->data, data->len);
- clnt_cmd[data->len] = '\0';
- gf_log (this->name, GF_LOG_DEBUG, "Got destination brick %s\n",
- clnt_cmd);
-
- ret = dict_set_dynstr (dict, CLIENT_CMD_CONNECT, clnt_cmd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not inititiate destination brick "
- "connect");
- goto out;
- }
-
- STACK_WIND (frame,
- pump_cmd_start_setxattr_cbk,
- PUMP_SINK_CHILD(this),
- PUMP_SINK_CHILD(this)->fops->setxattr,
- &loc,
- dict,
- 0, NULL);
-
- ret = 0;
-
-out:
- if (dict)
- dict_unref (dict);
-
- if (data)
- data_unref (data);
-
- if (ret && clnt_cmd)
- GF_FREE (clnt_cmd);
-
- loc_wipe (&loc);
- return ret;
-}
-
-static int
-is_pump_aborted (xlator_t *this)
-{
- pump_state_t state;
-
- state = pump_get_state ();
-
- return ((state == PUMP_STATE_ABORT));
-}
-
-int32_t
-pump_cmd_start_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- char *path = NULL;
-
- pump_state_t state;
- int ret = 0;
- int need_unwind = 0;
- int dict_ret = -1;
-
- local = frame->local;
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "getxattr failed - changing pump "
- "state to RUNNING with '/'");
- path = "/";
- ret = op_ret;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "getxattr succeeded");
-
- dict_ret = dict_get_str (dict, PUMP_PATH, &path);
- if (dict_ret < 0)
- path = "/";
- }
-
- state = pump_get_state ();
- if ((state == PUMP_STATE_RUNNING) ||
- (state == PUMP_STATE_RESUME)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Pump is already started");
- ret = -1;
- goto out;
- }
-
- pump_set_resume_path (this, path);
-
- if (is_pump_aborted (this))
- /* We're re-starting pump afresh */
- ret = pump_initiate_sink_connect (frame, this);
- else {
- /* We're re-starting pump from a previous
- pause */
- gf_log (this->name, GF_LOG_DEBUG,
- "about to start synctask");
- ret = pump_start_synctask (this);
- need_unwind = 1;
- }
-
-out:
- if ((ret < 0) || (need_unwind == 1)) {
- local->op_ret = ret;
- pump_command_reply (frame, this);
- }
- return 0;
-}
-
-int
-pump_execute_status (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- uint64_t number_files = 0;
-
- char filename[PATH_MAX];
- char summary[PATH_MAX+256];
- char *dict_str = NULL;
-
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- dict_t *dict = NULL;
- int ret = -1;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- LOCK (&pump_priv->resume_path_lock);
- {
- number_files = pump_priv->number_files_pumped;
- strncpy (filename, pump_priv->current_file, PATH_MAX);
- }
- UNLOCK (&pump_priv->resume_path_lock);
-
- dict_str = GF_CALLOC (1, PATH_MAX + 256, gf_afr_mt_char);
- if (!dict_str) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- if (pump_priv->pump_finished) {
- snprintf (summary, PATH_MAX+256,
- "no_of_files=%"PRIu64, number_files);
- } else {
- snprintf (summary, PATH_MAX+256,
- "no_of_files=%"PRIu64":current_file=%s",
- number_files, filename);
- }
- snprintf (dict_str, PATH_MAX+256, "status=%d:%s",
- (pump_priv->pump_finished)?1:0, summary);
-
- dict = dict_new ();
-
- ret = dict_set_dynstr (dict, RB_PUMP_CMD_STATUS, dict_str);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_set_dynstr returned negative value");
- } else {
- dict_str = NULL;
- }
-
- op_ret = 0;
-
-out:
-
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, NULL);
-
- if (dict)
- dict_unref (dict);
-
- GF_FREE (dict_str);
-
- return 0;
-}
-
-int
-pump_execute_pause (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- pump_change_state (this, PUMP_STATE_PAUSE);
-
- local->op_ret = 0;
- pump_command_reply (frame, this);
-
- return 0;
-}
-
-int
-pump_execute_start (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = 0;
- loc_t loc = {0};
-
- priv = this->private;
- local = frame->local;
-
- if (!priv->root_inode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Pump xlator cannot be started without an initial "
- "lookup");
- ret = -1;
- goto out;
- }
-
- GF_ASSERT (priv->root_inode);
-
- afr_build_root_loc (this, &loc);
-
- STACK_WIND (frame,
- pump_cmd_start_getxattr_cbk,
- PUMP_SOURCE_CHILD(this),
- PUMP_SOURCE_CHILD(this)->fops->getxattr,
- &loc,
- PUMP_PATH, NULL);
-
- ret = 0;
-
-out:
- if (ret < 0) {
- local->op_ret = ret;
- pump_command_reply (frame, this);
- }
-
- loc_wipe (&loc);
- return 0;
-}
-
-static int
-pump_cleanup_helper (void *data) {
- call_frame_t *frame = data;
-
- pump_xattr_cleaner (frame, 0, frame->this, 0, 0, NULL);
-
- return 0;
-}
-
-static int
-pump_cleanup_done (int ret, call_frame_t *sync_frame, void *data)
-{
- STACK_DESTROY (sync_frame->root);
-
- return 0;
-}
-
-int
-pump_execute_commit (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
- afr_local_t *local = NULL;
- call_frame_t *sync_frame = NULL;
- int ret = 0;
-
- priv = this->private;
- pump_priv = priv->pump_private;
- local = frame->local;
-
- local->op_ret = 0;
- if (pump_priv->pump_finished) {
- pump_change_state (this, PUMP_STATE_COMMIT);
- sync_frame = create_frame (this, this->ctx->pool);
- ret = synctask_new (pump_priv->env, pump_cleanup_helper,
- pump_cleanup_done, sync_frame, frame);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG, "Couldn't create "
- "synctask for cleaning up xattrs.");
- }
-
- } else {
- gf_log (this->name, GF_LOG_ERROR, "Commit can't proceed. "
- "Migration in progress");
- local->op_ret = -1;
- local->op_errno = EINPROGRESS;
- pump_command_reply (frame, this);
- }
-
- return 0;
-}
-int
-pump_execute_abort (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
- afr_local_t *local = NULL;
- call_frame_t *sync_frame = NULL;
- int ret = 0;
-
- priv = this->private;
- pump_priv = priv->pump_private;
- local = frame->local;
-
- pump_change_state (this, PUMP_STATE_ABORT);
-
- LOCK (&pump_priv->resume_path_lock);
- {
- pump_priv->number_files_pumped = 0;
- pump_priv->current_file[0] = '\0';
- }
- UNLOCK (&pump_priv->resume_path_lock);
-
- local->op_ret = 0;
- if (pump_priv->pump_finished) {
- sync_frame = create_frame (this, this->ctx->pool);
- ret = synctask_new (pump_priv->env, pump_cleanup_helper,
- pump_cleanup_done, sync_frame, frame);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG, "Couldn't create "
- "synctask for cleaning up xattrs.");
- }
-
- } else {
- pump_priv->cleaner = fop_setxattr_cbk_stub (frame,
- pump_xattr_cleaner,
- 0, 0, NULL);
- }
-
- return 0;
-}
-
-gf_boolean_t
-pump_command_status (xlator_t *this, dict_t *dict)
-{
- char *cmd = NULL;
- int dict_ret = -1;
- int ret = _gf_true;
-
- dict_ret = dict_get_str (dict, RB_PUMP_CMD_STATUS, &cmd);
- if (dict_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Not a pump status command");
- ret = _gf_false;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Hit a pump command - status");
- ret = _gf_true;
-
-out:
- return ret;
-
-}
-
-gf_boolean_t
-pump_command_pause (xlator_t *this, dict_t *dict)
-{
- char *cmd = NULL;
- int dict_ret = -1;
- int ret = _gf_true;
-
- dict_ret = dict_get_str (dict, RB_PUMP_CMD_PAUSE, &cmd);
- if (dict_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Not a pump pause command");
- ret = _gf_false;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Hit a pump command - pause");
- ret = _gf_true;
-
-out:
- return ret;
-
-}
-
-gf_boolean_t
-pump_command_commit (xlator_t *this, dict_t *dict)
-{
- char *cmd = NULL;
- int dict_ret = -1;
- int ret = _gf_true;
-
- dict_ret = dict_get_str (dict, RB_PUMP_CMD_COMMIT, &cmd);
- if (dict_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Not a pump commit command");
- ret = _gf_false;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Hit a pump command - commit");
- ret = _gf_true;
-
-out:
- return ret;
-
-}
-
-gf_boolean_t
-pump_command_abort (xlator_t *this, dict_t *dict)
-{
- char *cmd = NULL;
- int dict_ret = -1;
- int ret = _gf_true;
-
- dict_ret = dict_get_str (dict, RB_PUMP_CMD_ABORT, &cmd);
- if (dict_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Not a pump abort command");
- ret = _gf_false;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Hit a pump command - abort");
- ret = _gf_true;
-
-out:
- return ret;
-
-}
-
-gf_boolean_t
-pump_command_start (xlator_t *this, dict_t *dict)
-{
- char *cmd = NULL;
- int dict_ret = -1;
- int ret = _gf_true;
-
- dict_ret = dict_get_str (dict, RB_PUMP_CMD_START, &cmd);
- if (dict_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Not a pump start command");
- ret = _gf_false;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Hit a pump command - start");
- ret = _gf_true;
-
-out:
- return ret;
-
-}
-
-int
-pump_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- int op_errno = 0;
- int ret = 0;
-
- priv = this->private;
-
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_getxattr_cbk,
- FIRST_CHILD (this),
- (FIRST_CHILD (this))->fops->getxattr,
- loc, name, xdata);
- return 0;
- }
-
- if (name) {
- if (!strncmp (name, AFR_XATTR_PREFIX,
- strlen (AFR_XATTR_PREFIX))) {
-
- op_errno = ENODATA;
- ret = -1;
- goto out;
- }
-
- if (!strcmp (name, RB_PUMP_CMD_STATUS)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Hit pump command - status");
- pump_execute_status (frame, this);
- goto out;
- }
- }
-
- afr_getxattr (frame, this, loc, name, xdata);
-
-out:
- if (ret < 0)
- AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int
-pump_command_reply (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- if (local->op_ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "Command failed");
- else
- gf_log (this->name, GF_LOG_INFO,
- "Command succeeded");
-
- AFR_STACK_UNWIND (setxattr,
- frame,
- local->op_ret,
- local->op_errno, NULL);
-
- return 0;
-}
-
-int
-pump_parse_command (call_frame_t *frame, xlator_t *this, dict_t *dict,
- int *op_errno_p)
-{
- afr_local_t *local = NULL;
- int ret = -1;
- int op_errno = 0;
-
- if (pump_command_start (this, dict)) {
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
- local->dict = dict_ref (dict);
- ret = pump_execute_start (frame, this);
-
- } else if (pump_command_pause (this, dict)) {
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
- local->dict = dict_ref (dict);
- ret = pump_execute_pause (frame, this);
-
- } else if (pump_command_abort (this, dict)) {
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
- local->dict = dict_ref (dict);
- ret = pump_execute_abort (frame, this);
-
- } else if (pump_command_commit (this, dict)) {
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
- local->dict = dict_ref (dict);
- ret = pump_execute_commit (frame, this);
- }
-out:
- if (op_errno_p)
- *op_errno_p = op_errno;
- return ret;
-}
-
-int
-pump_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- int ret = -1;
- int op_errno = 0;
-
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.pump*", dict, op_errno, out);
-
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_setxattr_cbk,
- FIRST_CHILD (this),
- (FIRST_CHILD (this))->fops->setxattr,
- loc, dict, flags, xdata);
- return 0;
- }
-
- ret = pump_parse_command (frame, this, dict, &op_errno);
- if (ret >= 0)
- goto out;
-
- afr_setxattr (frame, this, loc, dict, flags, xdata);
-
- ret = 0;
-out:
- if (ret < 0) {
- AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
- }
-
- return 0;
-}
-
-/* Defaults */
-static int32_t
-pump_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_lookup_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- loc,
- xattr_req);
- return 0;
- }
-
- afr_lookup (frame, this, loc, xattr_req);
- return 0;
-}
-
-
-static int32_t
-pump_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc,
- offset, xdata);
- return 0;
- }
-
- afr_truncate (frame, this, loc, offset, xdata);
- return 0;
-}
-
-
-static int32_t
-pump_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd,
- offset, xdata);
- return 0;
- }
-
- afr_ftruncate (frame, this, fd, offset, xdata);
- return 0;
-}
-
-
-
-
-int
-pump_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_mknod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev, umask, xdata);
- return 0;
- }
- afr_mknod (frame, this, loc, mode, rdev, umask, xdata);
- return 0;
-
-}
-
-
-
-int
-pump_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_mkdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir,
- loc, mode, umask, xdata);
- return 0;
- }
- afr_mkdir (frame, this, loc, mode, umask, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_unlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- loc, xflag, xdata);
- return 0;
- }
- afr_unlink (frame, this, loc, xflag, xdata);
- return 0;
-
-}
-
-
-static int
-pump_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
-
- priv = this->private;
-
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_rmdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir,
- loc, flags, xdata);
- return 0;
- }
-
- afr_rmdir (frame, this, loc, flags, xdata);
- return 0;
-
-}
-
-
-
-int
-pump_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, mode_t umask, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_symlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink,
- linkpath, loc, umask, xdata);
- return 0;
- }
- afr_symlink (frame, this, linkpath, loc, umask, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_rename_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename,
- oldloc, newloc, xdata);
- return 0;
- }
- afr_rename (frame, this, oldloc, newloc, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_link_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link,
- oldloc, newloc, xdata);
- return 0;
- }
- afr_link (frame, this, oldloc, newloc, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, umask, fd, xdata);
- return 0;
- }
- afr_create (frame, this, loc, flags, mode, umask, fd, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags, fd_t *fd, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_open_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- loc, flags, fd, xdata);
- return 0;
- }
- afr_open (frame, this, loc, flags, fd, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd,
- vector,
- count,
- off, flags,
- iobref, xdata);
- return 0;
- }
-
- afr_writev (frame, this, fd, vector, count, off, flags, iobref, xdata);
- return 0;
-}
-
-
-static int32_t
-pump_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_flush_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd, xdata);
- return 0;
- }
- afr_flush (frame, this, fd, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_fsync_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync,
- fd,
- flags, xdata);
- return 0;
- }
- afr_fsync (frame, this, fd, flags, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, fd_t *fd, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_opendir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir,
- loc, fd, xdata);
- return 0;
- }
- afr_opendir (frame, this, loc, fd, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_fsyncdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsyncdir,
- fd,
- flags, xdata);
- return 0;
- }
- afr_fsyncdir (frame, this, fd, flags, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_xattrop (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_xattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop,
- loc,
- flags,
- dict, xdata);
- return 0;
- }
- afr_xattrop (frame, this, loc, flags, dict, xdata);
- return 0;
-
-}
-
-static int32_t
-pump_fxattrop (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_fxattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fxattrop,
- fd,
- flags,
- dict, xdata);
- return 0;
- }
- afr_fxattrop (frame, this, fd, flags, dict, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (this, out);
-
- GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.pump*",
- name, op_errno, out);
-
- op_errno = 0;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_removexattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- loc,
- name, xdata);
- return 0;
- }
- afr_removexattr (frame, this, loc, name, xdata);
-
- out:
- if (op_errno)
- AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
- return 0;
-
-}
-
-
-
-static int32_t
-pump_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t off, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_readdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir,
- fd, size, off, xdata);
- return 0;
- }
- afr_readdir (frame, this, fd, size, off, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t off, dict_t *dict)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_readdirp_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp,
- fd, size, off, dict);
- return 0;
- }
- afr_readdirp (frame, this, fd, size, off, dict);
- return 0;
-
-}
-
-
-
-static int32_t
-pump_releasedir (xlator_t *this,
- fd_t *fd)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (priv->use_afr_in_pump)
- afr_releasedir (this, fd);
- return 0;
-
-}
-
-static int32_t
-pump_release (xlator_t *this,
- fd_t *fd)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (priv->use_afr_in_pump)
- afr_release (this, fd);
- return 0;
-
-}
-
-static int32_t
-pump_forget (xlator_t *this, inode_t *inode)
-{
- afr_private_t *priv = NULL;
-
- priv = this->private;
- if (priv->use_afr_in_pump)
- afr_forget (this, inode);
-
- return 0;
-}
-
-static int32_t
-pump_setattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_setattr_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr,
- loc, stbuf, valid, xdata);
- return 0;
- }
- afr_setattr (frame, this, loc, stbuf, valid, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_fsetattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_fsetattr_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetattr,
- fd, stbuf, valid, xdata);
- return 0;
- }
- afr_fsetattr (frame, this, fd, stbuf, valid, xdata);
- return 0;
-
-}
-
-
-/* End of defaults */
-
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_afr_mt_end + 1);
-
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-static int
-is_xlator_pump_sink (xlator_t *child)
-{
- return (child == PUMP_SINK_CHILD(THIS));
-}
-
-static int
-is_xlator_pump_source (xlator_t *child)
-{
- return (child == PUMP_SOURCE_CHILD(THIS));
-}
-
-int32_t
-notify (xlator_t *this, int32_t event,
- void *data, ...)
-{
- int ret = -1;
- xlator_t *child_xl = NULL;
-
- child_xl = (xlator_t *) data;
-
- ret = afr_notify (this, event, data, NULL);
-
- switch (event) {
- case GF_EVENT_CHILD_DOWN:
- if (is_xlator_pump_source (child_xl))
- pump_change_state (this, PUMP_STATE_ABORT);
- break;
-
- case GF_EVENT_CHILD_UP:
- if (is_xlator_pump_sink (child_xl))
- if (is_pump_start_pending (this)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "about to start synctask");
- ret = pump_start_synctask (this);
- if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "Could not start pump "
- "synctask");
- else
- pump_remove_start_pending (this);
- }
- }
-
- return ret;
-}
-
-int32_t
-init (xlator_t *this)
-{
- afr_private_t * priv = NULL;
- pump_private_t *pump_priv = NULL;
- int child_count = 0;
- xlator_list_t * trav = NULL;
- int i = 0;
- int ret = -1;
- GF_UNUSED int op_errno = 0;
-
- int source_child = 0;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "pump translator needs a source and sink"
- "subvolumes defined.");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "Volume is dangling.");
- }
-
- priv = GF_CALLOC (1, sizeof (afr_private_t), gf_afr_mt_afr_private_t);
- if (!priv)
- goto out;
-
- LOCK_INIT (&priv->lock);
-
- child_count = xlator_subvolume_count (this);
- if (child_count != 2) {
- gf_log (this->name, GF_LOG_ERROR,
- "There should be exactly 2 children - one source "
- "and one sink");
- return -1;
- }
- priv->child_count = child_count;
-
- priv->read_child = source_child;
- priv->favorite_child = source_child;
- priv->background_self_heal_count = 0;
-
- priv->data_self_heal = "on";
- priv->metadata_self_heal = 1;
- priv->entry_self_heal = 1;
-
- priv->data_self_heal_window_size = 16;
-
- priv->data_change_log = 1;
- priv->metadata_change_log = 1;
- priv->entry_change_log = 1;
- priv->use_afr_in_pump = 1;
- priv->sh_readdir_size = 65536;
-
- /* Locking options */
-
- /* Lock server count infact does not matter. Locks are held
- on all subvolumes, in this case being the source
- and the sink.
- */
-
- priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
- gf_afr_mt_char);
- if (!priv->child_up) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto out;
- }
-
- priv->children = GF_CALLOC (sizeof (xlator_t *), child_count,
- gf_afr_mt_xlator_t);
- if (!priv->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto out;
- }
-
- priv->pending_key = GF_CALLOC (sizeof (*priv->pending_key),
- child_count,
- gf_afr_mt_char);
- if (!priv->pending_key) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto out;
- }
-
- trav = this->children;
- i = 0;
- while (i < child_count) {
- priv->children[i] = trav->xlator;
-
- ret = gf_asprintf (&priv->pending_key[i], "%s.%s",
- AFR_XATTR_PREFIX,
- trav->xlator->name);
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed to set pending key");
- op_errno = ENOMEM;
- goto out;
- }
-
- trav = trav->next;
- i++;
- }
-
- ret = gf_asprintf (&priv->sh_domain, "%s-self-heal", this->name);
- if (-1 == ret) {
- op_errno = ENOMEM;
- goto out;
- }
-
- priv->root_inode = NULL;
-
- priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event),
- gf_afr_mt_int32_t);
- if (!priv->last_event) {
- ret = -ENOMEM;
- goto out;
- }
-
- pump_priv = GF_CALLOC (1, sizeof (*pump_priv),
- gf_afr_mt_pump_priv);
- if (!pump_priv) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto out;
- }
-
- LOCK_INIT (&pump_priv->resume_path_lock);
- LOCK_INIT (&pump_priv->pump_state_lock);
-
- pump_priv->resume_path = GF_CALLOC (1, PATH_MAX,
- gf_afr_mt_char);
- if (!pump_priv->resume_path) {
- gf_log (this->name, GF_LOG_ERROR, "Out of memory");
- ret = -1;
- goto out;
- }
-
- pump_priv->env = this->ctx->env;
- if (!pump_priv->env) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not create new sync-environment");
- ret = -1;
- goto out;
- }
-
- /* keep more local here as we may need them for self-heal etc */
- this->local_pool = mem_pool_new (afr_local_t, 128);
- if (!this->local_pool) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- goto out;
- }
-
- priv->pump_private = pump_priv;
- pump_priv = NULL;
-
- this->private = priv;
- priv = NULL;
-
- pump_change_state (this, PUMP_STATE_ABORT);
-
- ret = 0;
-out:
-
- if (pump_priv) {
- GF_FREE (pump_priv->resume_path);
- LOCK_DESTROY (&pump_priv->resume_path_lock);
- LOCK_DESTROY (&pump_priv->pump_state_lock);
- GF_FREE (pump_priv);
- }
-
- if (priv) {
- GF_FREE (priv->child_up);
- GF_FREE (priv->children);
- GF_FREE (priv->pending_key);
- GF_FREE (priv->last_event);
- LOCK_DESTROY (&priv->lock);
- GF_FREE (priv);
- }
-
- return ret;
-}
-
-int
-fini (xlator_t *this)
-{
- afr_private_t * priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- priv = this->private;
- this->private = NULL;
- if (!priv)
- goto out;
-
- pump_priv = priv->pump_private;
- if (!pump_priv)
- goto afr_priv;
-
- GF_FREE (pump_priv->resume_path);
- LOCK_DESTROY (&pump_priv->resume_path_lock);
- LOCK_DESTROY (&pump_priv->pump_state_lock);
- GF_FREE (pump_priv);
-afr_priv:
- afr_priv_destroy (priv);
-out:
- return 0;
-}
-
-
-struct xlator_fops fops = {
- .lookup = pump_lookup,
- .open = pump_open,
- .flush = pump_flush,
- .fsync = pump_fsync,
- .fsyncdir = pump_fsyncdir,
- .xattrop = pump_xattrop,
- .fxattrop = pump_fxattrop,
- .getxattr = pump_getxattr,
-
- /* inode write */
- .writev = pump_writev,
- .truncate = pump_truncate,
- .ftruncate = pump_ftruncate,
- .setxattr = pump_setxattr,
- .setattr = pump_setattr,
- .fsetattr = pump_fsetattr,
- .removexattr = pump_removexattr,
-
- /* dir read */
- .opendir = pump_opendir,
- .readdir = pump_readdir,
- .readdirp = pump_readdirp,
-
- /* dir write */
- .create = pump_create,
- .mknod = pump_mknod,
- .mkdir = pump_mkdir,
- .unlink = pump_unlink,
- .rmdir = pump_rmdir,
- .link = pump_link,
- .symlink = pump_symlink,
- .rename = pump_rename,
-};
-
-struct xlator_dumpops dumpops = {
- .priv = afr_priv_dump,
-};
-
-
-struct xlator_cbks cbks = {
- .release = pump_release,
- .releasedir = pump_releasedir,
- .forget = pump_forget,
-};
-
-struct volume_options options[] = {
- { .key = {NULL} },
-};
diff --git a/xlators/cluster/afr/src/pump.h b/xlators/cluster/afr/src/pump.h
deleted file mode 100644
index 7d5acd02bf6..00000000000
--- a/xlators/cluster/afr/src/pump.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __PUMP_H__
-#define __PUMP_H__
-
-#include "syncop.h"
-
-/* FIXME: Needs to be defined in a common file */
-#define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
-#define CLIENT_CMD_DISCONNECT "trusted.glusterfs.client-disconnect"
-
-#define PUMP_SOURCE_COMPLETE "trusted.glusterfs.pump-source-complete"
-#define PUMP_SINK_COMPLETE "trusted.glusterfs.pump-sink-complete"
-
-#define PUMP_PATH "trusted.glusterfs.pump-path"
-
-#define PUMP_SOURCE_CHILD(xl) (xl->children->xlator)
-#define PUMP_SINK_CHILD(xl) (xl->children->next->xlator)
-
-typedef enum {
- PUMP_STATE_RUNNING, /* Pump is running and migrating files */
- PUMP_STATE_RESUME, /* Pump is resuming from a previous pause */
- PUMP_STATE_PAUSE, /* Pump is paused */
- PUMP_STATE_ABORT, /* Pump is aborted */
- PUMP_STATE_COMMIT, /* Pump is committed */
-} pump_state_t;
-
-typedef struct _pump_private {
- struct syncenv *env; /* The env pointer to the pump synctask */
- char *resume_path; /* path to resume from the last pause */
- gf_lock_t resume_path_lock; /* Synchronize resume_path changes */
- gf_lock_t pump_state_lock; /* Synchronize pump_state changes */
- pump_state_t pump_state; /* State of pump */
- char current_file[PATH_MAX]; /* Current file being pumped */
- uint64_t number_files_pumped; /* Number of files pumped */
- gf_boolean_t pump_finished; /* Boolean to indicate pump termination */
- char pump_start_pending; /* Boolean to mark start pending until
- CHILD_UP */
- call_stub_t *cleaner;
-} pump_private_t;
-
-void
-build_root_loc (inode_t *inode, loc_t *loc);
-int pump_start (call_frame_t *frame, xlator_t *this);
-
-gf_boolean_t
-pump_command_start (xlator_t *this, dict_t *dict);
-
-int
-pump_execute_start (call_frame_t *frame, xlator_t *this);
-
-gf_boolean_t
-pump_command_pause (xlator_t *this, dict_t *dict);
-
-int
-pump_execute_pause (call_frame_t *frame, xlator_t *this);
-
-gf_boolean_t
-pump_command_abort (xlator_t *this, dict_t *dict);
-
-int
-pump_execute_abort (call_frame_t *frame, xlator_t *this);
-
-gf_boolean_t
-pump_command_status (xlator_t *this, dict_t *dict);
-
-int
-pump_execute_status (call_frame_t *frame, xlator_t *this);
-
-int
-pump_command_reply (call_frame_t *frame, xlator_t *this);
-
-#endif /* __PUMP_H__ */
diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am
index a8e1ec0d286..56f1f2ad7c8 100644
--- a/xlators/cluster/dht/src/Makefile.am
+++ b/xlators/cluster/dht/src/Makefile.am
@@ -1,7 +1,4 @@
xlator_LTLIBRARIES = dht.la nufa.la switch.la
-if BUILD_GFDB
- xlator_LTLIBRARIES += tier.la
-endif
AM_CFLAGS = -Wall $(GF_CFLAGS)
@@ -10,36 +7,31 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c dht-rebalance.c \
dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c \
dht-common.c dht-inode-write.c dht-inode-read.c dht-shared.c \
- $(top_builddir)/xlators/lib/src/libxlator.c
+ dht-lock.c $(top_builddir)/xlators/lib/src/libxlator.c
dht_la_SOURCES = $(dht_common_source) dht.c
nufa_la_SOURCES = $(dht_common_source) nufa.c
switch_la_SOURCES = $(dht_common_source) switch.c
-tier_la_SOURCES = $(dht_common_source) tier.c
-dht_la_LDFLAGS = -module -avoid-version
+dht_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-nufa_la_LDFLAGS = -module -avoid-version
+nufa_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-switch_la_LDFLAGS = -module -avoid-version
+switch_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-tier_la_LDFLAGS = -module -avoid-version
-tier_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = dht-common.h dht-mem-types.h dht-messages.h dht-helper.h tier.h\
- $(top_builddir)/xlators/lib/src/libxlator.h
+noinst_HEADERS = dht-common.h dht-mem-types.h dht-messages.h \
+ dht-lock.h $(top_builddir)/xlators/lib/src/libxlator.h
AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/libglusterfs/src/gfdb \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
-I$(top_srcdir)/xlators/lib/src \
-DDATADIR=\"$(localstatedir)\" \
- -DLIBDIR=\"$(libdir)\" \
- -DLIBGFDB_VERSION=\"$(LIBGFDB_VERSION)\"
+ -DLIBDIR=\"$(libdir)\"
CLEANFILES =
@@ -53,13 +45,4 @@ if UNITTEST
CLEANFILES += *.gcda *.gcno *_xunit.xml
noinst_PROGRAMS =
TESTS =
-
-dht_layout_unittest_CPPFLAGS = $(AM_CPPFLAGS)
-dht_layout_unittest_SOURCES = unittest/dht_layout_unittest.c \
- unittest/dht_layout_mock.c \
- dht-layout.c
-dht_layout_unittest_CFLAGS = $(AM_CFLAGS) $(UNITTEST_CFLAGS)
-dht_layout_unittest_LDFLAGS = $(UNITTEST_LDFLAGS)
-noinst_PROGRAMS += dht_layout_unittest
-TESTS += dht_layout_unittest
endif
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 307265fb56a..8ba0cc4c732 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -8,151 +8,439 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
/* TODO: add NS locking */
-#include "glusterfs.h"
-#include "xlator.h"
#include "libxlator.h"
#include "dht-common.h"
-#include "defaults.h"
-#include "byte-order.h"
-#include "glusterfs-acl.h"
-#include "quota-common-utils.h"
+#include "dht-lock.h"
+#include <glusterfs/byte-order.h>
+#include <glusterfs/quota-common-utils.h>
+#include <glusterfs/upcall-utils.h>
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
+#include <glusterfs/common-utils.h>
#include <sys/time.h>
#include <libgen.h>
#include <signal.h>
-int dht_link2 (xlator_t *this, call_frame_t *frame, int op_ret);
+static int
+dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata);
-int
-dht_aggregate_quota_xattr (dict_t *dst, char *key, data_t *value)
+static int
+dht_link2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
+
+static int
+dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req);
+
+static int
+dht_lookup_everywhere_done(call_frame_t *frame, xlator_t *this);
+
+static int
+dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata);
+
+static int
+dht_rmdir_unlock(call_frame_t *frame, xlator_t *this);
+
+static const char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL};
+
+/* Check the xdata to make sure EBADF has been set by client xlator */
+int32_t
+dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno)
{
- int ret = -1;
- quota_meta_t *meta_dst = NULL;
- quota_meta_t *meta_src = NULL;
- int64_t *size = NULL;
- int64_t dst_dir_count = 0;
- int64_t src_dir_count = 0;
+ if (op_ret == -1 && (op_errno == EBADF || op_errno == EBADFD) &&
+ !(local->fd_checked)) {
+ return 1;
+ }
+ return 0;
+}
- if (value == NULL) {
- gf_log ("dht", GF_LOG_WARNING, "data value is NULL");
- ret = -1;
- goto out;
- }
+/* Sets the blocks and size values to fixed values. This is to be called
+ * only for dirs. The caller is responsible for checking the type
+ */
+int32_t
+dht_set_fixed_dir_stat(struct iatt *stat)
+{
+ if (stat) {
+ stat->ia_blocks = DHT_DIR_STAT_BLOCKS;
+ stat->ia_size = DHT_DIR_STAT_SIZE;
+ return 0;
+ }
+ return -1;
+}
+
+/* Return true if key exists in array
+ */
+static gf_boolean_t
+dht_match_xattr(const char *key)
+{
+ char **xattrs_to_heal = get_xattrs_to_heal();
+
+ return gf_get_index_by_elem(xattrs_to_heal, (char *)key) >= 0;
+}
- ret = dict_get_bin (dst, key, (void **)&meta_dst);
+static int
+dht_aggregate_quota_xattr(dict_t *dst, char *key, data_t *value)
+{
+ int ret = -1;
+ quota_meta_t *meta_dst = NULL;
+ quota_meta_t *meta_src = NULL;
+ int64_t *size = NULL;
+ int64_t dst_dir_count = 0;
+ int64_t src_dir_count = 0;
+
+ if (value == NULL) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_DATA_NULL,
+ "data value is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_bin(dst, key, (void **)&meta_dst);
+ if (ret < 0) {
+ meta_dst = GF_CALLOC(1, sizeof(quota_meta_t), gf_common_quota_meta_t);
+ if (meta_dst == NULL) {
+ gf_msg("dht", GF_LOG_WARNING, ENOMEM, DHT_MSG_NO_MEMORY,
+ "Memory allocation failed");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_bin(dst, key, meta_dst, sizeof(quota_meta_t));
if (ret < 0) {
- meta_dst = GF_CALLOC (1, sizeof (quota_meta_t),
- gf_common_quota_meta_t);
- if (meta_dst == NULL) {
- gf_msg ("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY,
- "Memory allocation failed");
- ret = -1;
- goto out;
- }
- ret = dict_set_bin (dst, key, meta_dst,
- sizeof (quota_meta_t));
- if (ret < 0) {
- gf_log ("dht", GF_LOG_WARNING,
- "dht aggregate dict set failed");
- GF_FREE (meta_dst);
- ret = -1;
- goto out;
- }
+ gf_msg("dht", GF_LOG_WARNING, EINVAL, DHT_MSG_DICT_SET_FAILED,
+ "dht aggregate dict set failed");
+ GF_FREE(meta_dst);
+ ret = -1;
+ goto out;
}
+ }
- if (value->len > sizeof (int64_t)) {
- meta_src = data_to_bin (value);
+ if (value->len > sizeof(int64_t)) {
+ meta_src = data_to_bin(value);
- meta_dst->size = hton64 (ntoh64 (meta_dst->size) +
- ntoh64 (meta_src->size));
- meta_dst->file_count = hton64 (ntoh64 (meta_dst->file_count) +
- ntoh64 (meta_src->file_count));
+ meta_dst->size = hton64(ntoh64(meta_dst->size) +
+ ntoh64(meta_src->size));
+ meta_dst->file_count = hton64(ntoh64(meta_dst->file_count) +
+ ntoh64(meta_src->file_count));
- if (value->len > (2 * sizeof (int64_t))) {
- dst_dir_count = ntoh64 (meta_dst->dir_count);
- src_dir_count = ntoh64 (meta_src->dir_count);
+ if (value->len > (2 * sizeof(int64_t))) {
+ dst_dir_count = ntoh64(meta_dst->dir_count);
+ src_dir_count = ntoh64(meta_src->dir_count);
- if (src_dir_count > dst_dir_count)
- meta_dst->dir_count = meta_src->dir_count;
- } else {
- meta_dst->dir_count = 0;
- }
+ if (src_dir_count > dst_dir_count)
+ meta_dst->dir_count = meta_src->dir_count;
} else {
- size = data_to_bin (value);
- meta_dst->size = hton64 (ntoh64 (meta_dst->size) +
- ntoh64 (*size));
+ meta_dst->dir_count = 0;
}
+ } else {
+ size = data_to_bin(value);
+ meta_dst->size = hton64(ntoh64(meta_dst->size) + ntoh64(*size));
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-int
-dht_aggregate (dict_t *this, char *key, data_t *value, void *data)
+static int
+add_opt(char **optsp, const char *opt)
{
- dict_t *dst = NULL;
- int32_t ret = -1;
- data_t *dict_data = NULL;
-
- dst = data;
+ char *newopts = NULL;
+ unsigned oldsize = 0;
+ unsigned newsize = 0;
+
+ if (*optsp == NULL)
+ newopts = gf_strdup(opt);
+ else {
+ oldsize = strlen(*optsp);
+ newsize = oldsize + 1 + strlen(opt) + 1;
+ newopts = GF_REALLOC(*optsp, newsize);
+ if (newopts)
+ sprintf(newopts + oldsize, ",%s", opt);
+ }
+ if (newopts == NULL) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY,
+ "Error to add choices in buffer in add_opt");
+ return -1;
+ }
+ *optsp = newopts;
+ return 0;
+}
- if (strcmp (key, QUOTA_SIZE_KEY) == 0) {
- ret = dht_aggregate_quota_xattr (dst, key, value);
- if (ret) {
- gf_log ("dht", GF_LOG_WARNING, "Failed to "
- "aggregate qutoa xattr");
- goto out;
- }
- } else if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) {
- ret = gf_get_min_stime (THIS, dst, key, value);
- if (ret < 0)
- goto out;
- } else {
- /* compare user xattrs only */
- if (!strncmp (key, "user.", strlen ("user."))) {
- ret = dict_lookup (dst, key, &dict_data);
- if (!ret && dict_data && value) {
- ret = is_data_equal (dict_data, value);
- if (!ret)
- gf_msg_debug ("dht", 0,
- "xattr mismatch for %s",
- key);
- }
- }
- ret = dict_set (dst, key, value);
- if (ret) {
- gf_msg ("dht", GF_LOG_WARNING, 0,
- DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value: key = %s",
- key);
- }
- }
+/* Return Choice list from Split brain status */
+static char *
+getChoices(const char *value)
+{
+ int i = 0;
+ char *ptr = NULL;
+ char *tok = NULL;
+ char *result = NULL;
+ char *newval = NULL;
+
+ ptr = strstr(value, "Choices:");
+ if (!ptr) {
+ result = ptr;
+ goto out;
+ }
+
+ newval = gf_strdup(ptr);
+ if (!newval) {
+ result = newval;
+ goto out;
+ }
+
+ tok = strtok(newval, ":");
+ if (!tok) {
+ result = tok;
+ goto out;
+ }
+
+ while (tok) {
+ i++;
+ if (i == 2)
+ break;
+ tok = strtok(NULL, ":");
+ }
+
+ result = gf_strdup(tok);
- ret = 0;
out:
- return ret;
+ if (newval)
+ GF_FREE(newval);
+
+ return result;
}
+/* This function prepare a list of choices for key
+ (replica.split-brain-status) in case of metadata split brain
+ only on the basis of key-value passed to this function.
+ After prepare the list of choices it update the same key in dict
+ with this value to reflect the same in
+ replica.split-brain-status attr for file.
-void
-dht_aggregate_xattr (dict_t *dst, dict_t *src)
+*/
+
+static int
+dht_aggregate_split_brain_xattr(dict_t *dst, char *key, data_t *value)
{
- if ((dst == NULL) || (src == NULL)) {
+ int ret = 0;
+ char *oldvalue = NULL;
+ char *old_choice = NULL;
+ char *new_choice = NULL;
+ char *full_choice = NULL;
+ char *status = NULL;
+
+ if (value == NULL) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_DATA_NULL,
+ "GF_AFR_SBRAIN_STATUS value is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str(dst, key, &oldvalue);
+ if (ret)
+ goto out;
+
+ /* skip code that is irrelevant if !oldvalue */
+ if (!oldvalue)
+ goto out;
+
+ if (strstr(oldvalue, "not")) {
+ gf_msg_debug("dht", 0, "Need to update split-brain status in dict");
+ ret = -1;
+ goto out;
+ }
+ if (strstr(oldvalue, "metadata-split-brain:yes") &&
+ (strstr(oldvalue, "data-split-brain:no"))) {
+ if (strstr(value->data, "not")) {
+ gf_msg_debug("dht", 0, "No need to update split-brain status");
+ ret = 0;
+ goto out;
+ }
+ if (strstr(value->data, "yes") &&
+ (strncmp(oldvalue, value->data, strlen(oldvalue)))) {
+ old_choice = getChoices(oldvalue);
+ if (!old_choice) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY,
+ "Error to get choices");
+ ret = -1;
+ goto out;
+ }
+
+ ret = add_opt(&full_choice, old_choice);
+ if (ret) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY,
+ "Error to add choices");
+ ret = -1;
goto out;
+ }
+
+ new_choice = getChoices(value->data);
+ if (!new_choice) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY,
+ "Error to get choices");
+ ret = -1;
+ goto out;
+ }
+
+ ret = add_opt(&full_choice, new_choice);
+ if (ret) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY,
+ "Error to add choices ");
+ ret = -1;
+ goto out;
+ }
+ ret = gf_asprintf(&status,
+ "data-split-brain:%s "
+ "metadata-split-brain:%s Choices:%s",
+ "no", "yes", full_choice);
+
+ if (-1 == ret) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY,
+ "Error to prepare status ");
+ goto out;
+ }
+ ret = dict_set_dynstr(dst, key, status);
+ if (ret) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set full choice");
+ }
}
+ }
- dict_foreach (src, dht_aggregate, dst);
out:
- return;
+ if (old_choice)
+ GF_FREE(old_choice);
+ if (new_choice)
+ GF_FREE(new_choice);
+ if (full_choice)
+ GF_FREE(full_choice);
+
+ return ret;
+}
+
+static int
+dht_aggregate(dict_t *this, char *key, data_t *value, void *data)
+{
+ dict_t *dst = NULL;
+ int32_t ret = -1;
+ data_t *dict_data = NULL;
+
+ dst = data;
+
+ /* compare split brain xattr only */
+ if (strcmp(key, GF_AFR_SBRAIN_STATUS) == 0) {
+ ret = dht_aggregate_split_brain_xattr(dst, key, value);
+ if (!ret)
+ goto out;
+ } else if (strcmp(key, QUOTA_SIZE_KEY) == 0) {
+ ret = dht_aggregate_quota_xattr(dst, key, value);
+ if (ret) {
+ gf_msg("dht", GF_LOG_WARNING, 0,
+ DHT_MSG_AGGREGATE_QUOTA_XATTR_FAILED,
+ "Failed to aggregate quota xattr");
+ }
+ goto out;
+ } else if (fnmatch(GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) {
+ ret = gf_get_min_stime(THIS, dst, key, value);
+ goto out;
+ } else {
+ /* compare user xattrs only */
+ if (!strncmp(key, "user.", SLEN("user."))) {
+ ret = dict_lookup(dst, key, &dict_data);
+ if (!ret && dict_data && value) {
+ ret = is_data_equal(dict_data, value);
+ if (!ret)
+ gf_msg_debug("dht", 0, "xattr mismatch for %s", key);
+ }
+ }
+ }
+
+ ret = dict_set(dst, key, value);
+ if (ret) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value: key = %s", key);
+ }
+
+out:
+ return ret;
+}
+
+static void
+dht_aggregate_xattr(dict_t *dst, dict_t *src)
+{
+ if ((dst == NULL) || (src == NULL)) {
+ goto out;
+ }
+
+ dict_foreach(src, dht_aggregate, dst);
+out:
+ return;
+}
+
+/* Code to save hashed subvol on inode ctx as a mds subvol
+ */
+int
+dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+ uint64_t ctx_int = 0;
+ gf_boolean_t ctx_free = _gf_false;
+
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_get(inode, this, &ctx_int);
+ if (ctx_int) {
+ ctx = (dht_inode_ctx_t *)(uintptr_t)ctx_int;
+ ctx->mds_subvol = mds_subvol;
+ } else {
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ goto unlock;
+ ctx->mds_subvol = mds_subvol;
+ ctx_free = _gf_true;
+ ctx_int = (long)ctx;
+ ret = __inode_ctx_set(inode, this, &ctx_int);
+ }
+ }
+unlock:
+ UNLOCK(&inode->lock);
+ if (ret && ctx_free)
+ GF_FREE(ctx);
+ return ret;
+}
+
+/*Code to get mds subvol from inode ctx */
+
+int
+dht_inode_ctx_mdsvol_get(inode_t *inode, xlator_t *this, xlator_t **mdsvol)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ if (!mdsvol)
+ return ret;
+
+ if (__is_root_gfid(inode->gfid)) {
+ (*mdsvol) = FIRST_CHILD(this);
+ return 0;
+ }
+
+ ret = dht_inode_ctx_get(inode, this, &ctx);
+
+ if (!ret && ctx) {
+ if (ctx->mds_subvol) {
+ *mdsvol = ctx->mds_subvol;
+ ret = 0;
+ } else {
+ ret = -1;
+ }
+ }
+
+ return ret;
}
/* TODO:
@@ -162,907 +450,1851 @@ out:
- complete linkfile selfheal
*/
-
-int
-dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+static int
+dht_lookup_selfheal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int ret = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
- local = frame->local;
- ret = op_ret;
+ local = frame->local;
+ conf = this->private;
+ ret = op_ret;
- FRAME_SU_UNDO (frame, dht_local_t);
+ FRAME_SU_UNDO(frame, dht_local_t);
- if (ret == 0) {
- layout = local->selfheal.layout;
- ret = dht_layout_set (this, local->inode, layout);
- }
+ if (ret == 0) {
+ layout = local->selfheal.layout;
+ ret = dht_layout_set(this, local->inode, layout);
+ }
- if (local->loc.parent) {
- dht_inode_ctx_time_update (local->loc.parent, this,
- &local->postparent, 1);
- }
+ dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this, &local->postparent,
+ 1);
+ }
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->postparent);
+ /* Delete mds xattr at the time of STACK UNWIND */
+ GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr);
- DHT_STACK_UNWIND (lookup, frame, ret, local->op_errno, local->inode,
- &local->stbuf, local->xattr, &local->postparent);
+ DHT_STACK_UNWIND(lookup, frame, ret, local->op_errno, local->inode,
+ &local->stbuf, local->xattr, &local->postparent);
out:
- return ret;
+ return ret;
}
-
-int
-dht_discover_complete (xlator_t *this, call_frame_t *discover_frame)
+static int
+dht_discover_complete(xlator_t *this, call_frame_t *discover_frame)
{
- dht_local_t *local = NULL;
- call_frame_t *main_frame = NULL;
- int op_errno = 0;
- int ret = -1;
- dht_layout_t *layout = NULL;
- dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ dht_local_t *heal_local = NULL;
+ call_frame_t *main_frame = NULL;
+ call_frame_t *heal_frame = NULL;
+ int op_errno = 0;
+ int ret = -1;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ uint32_t vol_commit_hash = 0;
+ xlator_t *source = NULL;
+ int heal_path = 0;
+ int error_while_marking_mds = 0;
+ int i = 0;
+ loc_t loc = {0};
+ int8_t is_read_only = 0, layout_anomalies = 0;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+
+ local = discover_frame->local;
+ layout = local->layout;
+ conf = this->private;
+ gf_uuid_unparse(local->gfid, gfid_local);
+
+ LOCK(&discover_frame->lock);
+ {
+ main_frame = local->main_frame;
+ local->main_frame = NULL;
+ }
+ UNLOCK(&discover_frame->lock);
- local = discover_frame->local;
- layout = local->layout;
- conf = this->private;
+ if (!main_frame)
+ return 0;
- LOCK(&discover_frame->lock);
- {
- main_frame = local->main_frame;
- local->main_frame = NULL;
+ /* Code to update all extended attributed from
+ subvol to local->xattr on that internal xattr has found
+ */
+ if (conf->subvolume_cnt == 1)
+ local->need_xattr_heal = 0;
+ if (local->need_xattr_heal && (local->mds_xattr)) {
+ dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr,
+ NULL, NULL);
+ dict_unref(local->mds_xattr);
+ local->mds_xattr = NULL;
+ }
+
+ ret = dict_get_int8(local->xattr_req, QUOTA_READ_ONLY_KEY, &is_read_only);
+ if (ret < 0)
+ gf_msg_debug(this->name, 0, "key = %s not present in dict",
+ QUOTA_READ_ONLY_KEY);
+
+ if (local->file_count && local->dir_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_FILE_TYPE_MISMATCH,
+ "path %s exists as a file on one subvolume "
+ "and directory on another. "
+ "Please fix it manually",
+ local->loc.path);
+ op_errno = EIO;
+ goto out;
+ }
+
+ if (local->cached_subvol) {
+ ret = dht_layout_preset(this, local->cached_subvol, local->inode);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SET_FAILED,
+ "failed to set layout for subvolume %s",
+ local->cached_subvol ? local->cached_subvol->name : "<nil>");
+ op_errno = EINVAL;
+ goto out;
+ }
+ } else {
+ ret = dht_layout_normalize(this, &local->loc, layout);
+ if ((ret < 0) || ((ret > 0) && (local->op_ret != 0))) {
+ /* either the layout is incorrect or the directory is
+ * not found even in one subvolume.
+ */
+ gf_msg_debug(this->name, 0,
+ "normalizing failed on %s "
+ "(overlaps/holes present: %s, "
+ "ENOENT errors: %d)",
+ local->loc.path, (ret < 0) ? "yes" : "no",
+ (ret > 0) ? ret : 0);
+ layout_anomalies = 1;
+ } else if (local->inode) {
+ dht_layout_set(this, local->inode, layout);
+ }
+ }
+
+ if (!conf->vch_forced) {
+ ret = dict_get_uint32(local->xattr, conf->commithash_xattr_name,
+ &vol_commit_hash);
+ if (ret == 0) {
+ conf->vol_commit_hash = vol_commit_hash;
}
- UNLOCK(&discover_frame->lock);
-
- if (!main_frame)
- return 0;
+ }
- if (local->file_count && local->dir_count) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_FILE_TYPE_MISMATCH,
- "path %s exists as a file on one subvolume "
- "and directory on another. "
- "Please fix it manually",
- local->loc.path);
- op_errno = EIO;
- goto out;
+ if (IA_ISDIR(local->stbuf.ia_type) && !is_read_only) {
+ for (i = 0; i < layout->cnt; i++) {
+ if (!source && !layout->list[i].err)
+ source = layout->list[i].xlator;
+ if (layout->list[i].err == ENOENT ||
+ layout->list[i].err == ESTALE) {
+ heal_path = 1;
+ }
+
+ if (source && heal_path)
+ break;
}
+ }
- if (local->cached_subvol) {
- ret = dht_layout_preset (this, local->cached_subvol,
- local->inode);
- if (ret < 0) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_LAYOUT_SET_FAILED,
- "failed to set layout for subvolume %s",
- local->cached_subvol ? local->cached_subvol->name : "<nil>");
- op_errno = EINVAL;
- goto out;
- }
- } else {
- ret = dht_layout_normalize (this, &local->loc, layout);
- if ((ret < 0) || ((ret > 0) && (local->op_ret != 0))) {
- /* either the layout is incorrect or the directory is
- * not found even in one subvolume.
- */
- gf_msg_debug (this->name, 0,
- "normalizing failed on %s "
- "(overlaps/holes present: %s, "
- "ENOENT errors: %d)", local->loc.path,
- (ret < 0) ? "yes" : "no", (ret > 0) ? ret : 0);
- if ((ret > 0) && (ret == conf->subvolume_cnt)) {
- op_errno = ESTALE;
- goto out;
- }
+ if (IA_ISDIR(local->stbuf.ia_type)) {
+ /* Call function to save hashed subvol on inode ctx if
+ internal mds xattr is not present and all subvols are up
+ */
+ if (!local->op_ret && !__is_root_gfid(local->stbuf.ia_gfid))
+ (void)dht_common_mark_mdsxattr(discover_frame,
+ &error_while_marking_mds, 1);
+
+ if (local->need_xattr_heal && !heal_path) {
+ local->need_xattr_heal = 0;
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "xattr heal failed for "
+ "directory gfid is %s ",
+ gfid_local);
+ }
+ }
+ }
+
+ if (source && (heal_path || layout_anomalies || error_while_marking_mds)) {
+ gf_uuid_copy(loc.gfid, local->gfid);
+ if (gf_uuid_is_null(loc.gfid)) {
+ goto done;
+ }
+
+ if (local->inode)
+ loc.inode = inode_ref(local->inode);
+ else
+ goto done;
+
+ heal_frame = create_frame(this, this->ctx->pool);
+ if (heal_frame) {
+ heal_local = dht_local_init(heal_frame, &loc, NULL, 0);
+ if (!heal_local)
+ goto cleanup;
+
+ gf_uuid_copy(heal_local->gfid, local->gfid);
+ heal_frame->cookie = source;
+ heal_local->xattr = dict_ref(local->xattr);
+ heal_local->stbuf = local->stbuf;
+ heal_local->postparent = local->postparent;
+ heal_local->inode = inode_ref(loc.inode);
+ heal_local->main_frame = main_frame;
+ FRAME_SU_DO(heal_frame, dht_local_t);
+ ret = synctask_new(this->ctx->env, dht_heal_full_path,
+ dht_heal_full_path_done, heal_frame, heal_frame);
+ if (!ret) {
+ loc_wipe(&loc);
+ return 0;
+ }
+ /*
+ * Failed to spawn the synctask. Returning
+ * with out doing heal.
+ */
+ cleanup:
+ loc_wipe(&loc);
+ DHT_STACK_DESTROY(heal_frame);
+ }
+ }
+done:
+ dht_set_fixed_dir_stat(&local->postparent);
+ /* Delete mds xattr at the time of STACK UNWIND */
+ if (local->xattr)
+ GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr);
- /* For fixing the directory layout, we need to choose
- * the subvolume on which layout will be set first.
- * Because in nameless lookup, we have gfid only,
- * we are dependent on gfid. Therefore if conf->
- * randomize_by_gfid is set, then only we proceed for
- * healing layout of directory otherwise we don't heal.
- */
+ DHT_STACK_UNWIND(lookup, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
- if (local->inode && conf->randomize_by_gfid)
- goto selfheal;
- }
+out:
+ DHT_STACK_UNWIND(lookup, main_frame, -1, op_errno, NULL, NULL, NULL, NULL);
- if (local->inode)
- dht_layout_set (this, local->inode, layout);
- }
+ return ret;
+}
- DHT_STACK_UNWIND (lookup, main_frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xattr,
- &local->postparent);
- return 0;
+static int
+dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = cookie;
+ int ret = -1;
+ dht_conf_t *conf = 0;
+ dht_layout_t *layout = NULL;
+ int32_t mds_heal_fresh_lookup = 0;
+
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+
+ local = frame->local;
+ conf = this->private;
+ layout = local->selfheal.layout;
+ mds_heal_fresh_lookup = local->mds_heal_fresh_lookup;
+
+ if (op_ret) {
+ gf_msg_debug(this->name, op_ret,
+ "Failed to set %s on the MDS %s for path %s. ",
+ conf->mds_xattr_key, prev->name, local->loc.path);
+ } else {
+ /* Save mds subvol on inode ctx */
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set mds subvol on inode ctx"
+ " %s for %s ",
+ prev->name, local->loc.path);
+ }
+ }
+ if (!local->mds_heal_fresh_lookup && layout) {
+ dht_selfheal_dir_setattr(frame, &local->loc, &local->stbuf, 0xffffffff,
+ layout);
+ }
out:
- DHT_STACK_UNWIND (lookup, main_frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
+ if (mds_heal_fresh_lookup)
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
- return ret;
+static xlator_t *
+dht_inode_get_hashed_subvol(inode_t *inode, xlator_t *this, loc_t *loc)
+{
+ char *path = NULL;
+ loc_t populate_loc = {
+ 0,
+ };
+ char *name = NULL;
+ xlator_t *hash_subvol = NULL;
+
+ if (!inode)
+ return hash_subvol;
+
+ if (loc && loc->parent && loc->path) {
+ if (!loc->name) {
+ name = strrchr(loc->path, '/');
+ if (name) {
+ loc->name = name + 1;
+ } else {
+ goto out;
+ }
+ }
+ hash_subvol = dht_subvol_get_hashed(this, loc);
+ goto out;
+ }
-selfheal:
+ if (!gf_uuid_is_null(inode->gfid)) {
+ populate_loc.inode = inode_ref(inode);
+ populate_loc.parent = inode_parent(populate_loc.inode, NULL, NULL);
+ inode_path(populate_loc.inode, NULL, &path);
- main_frame->local = local;
- discover_frame->local = NULL;
- FRAME_SU_DO (main_frame, dht_local_t);
- gf_uuid_copy (local->loc.gfid, local->gfid);
- ret = dht_selfheal_directory_for_nameless_lookup (main_frame,
- dht_lookup_selfheal_cbk,
- &local->loc, layout);
- return ret;
+ if (!path)
+ goto out;
+
+ populate_loc.path = path;
+ if (!populate_loc.name && populate_loc.path) {
+ name = strrchr(populate_loc.path, '/');
+ if (name) {
+ populate_loc.name = name + 1;
+ } else {
+ goto out;
+ }
+ }
+ hash_subvol = dht_subvol_get_hashed(this, &populate_loc);
+ }
+out:
+ if (populate_loc.inode)
+ loc_wipe(&populate_loc);
+ return hash_subvol;
}
+/* Common function call by revalidate/selfheal code path to populate
+ internal xattr if it is not present, mark_during_fresh_lookup value
+ determines either function is call by revalidate_cbk(discover_complete)
+ or call by selfheal code path while fresh lookup.
+ Here we do wind a call serially in case of fresh lookup and
+ for other lookup code path we do wind a call parallel.The reason
+ to wind a call serially is at the time of fresh lookup directory is not
+ discovered and at the time of revalidate_lookup directory is
+ already discovered. So, revalidate codepath can race with setxattr
+ codepath and can get into spurious heals because of an ongoing setxattr.
+ This can slow down revalidates, if healing happens in foreground.
+ However, if healing happens in background, there is no direct performance
+ penalty.
+*/
int
-dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- int ret = -1;
- int is_dir = 0;
- int is_linkfile = 0;
- int attempt_unwind = 0;
- dht_conf_t *conf = 0;
- char gfid_local[GF_UUID_BUF_SIZE] = {0};
- char gfid_node[GF_UUID_BUF_SIZE] = {0};
-
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", this->private, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
-
- local = frame->local;
- prev = cookie;
- conf = this->private;
+dht_common_mark_mdsxattr(call_frame_t *frame, int *errst,
+ int mark_during_fresh_lookup)
+{
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ xlator_t *hashed_subvol = NULL;
+ int ret = 0;
+ int i = 0;
+ dict_t *xattrs = NULL;
+ char gfid_local[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+ int32_t zero[1] = {0};
+ dht_conf_t *conf = 0;
+ dht_layout_t *layout = NULL;
+ dht_local_t *copy_local = NULL;
+ call_frame_t *xattr_frame = NULL;
+ gf_boolean_t vol_down = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ local = frame->local;
+ conf = this->private;
+ layout = local->selfheal.layout;
+ local->mds_heal_fresh_lookup = mark_during_fresh_lookup;
+
+ gf_uuid_unparse(local->gfid, gfid_local);
+
+ /* Code to update hashed subvol consider as a mds subvol
+ and wind a setxattr call on hashed subvol to update
+ internal xattr
+ */
+ if (!local->xattr || !dict_get(local->xattr, conf->mds_xattr_key)) {
+ /* It means no internal MDS xattr has been set yet
+ */
+ /* Check the status of all subvol are up while call
+ this function call by lookup code path
+ */
+ if (mark_during_fresh_lookup) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ vol_down = _gf_true;
+ break;
+ }
+ }
+ if (vol_down) {
+ gf_msg_debug(this->name, 0,
+ "subvol %s is down. Unable to "
+ " save mds subvol on inode for "
+ " path %s gfid is %s ",
+ conf->subvolumes[i]->name, local->loc.path,
+ gfid_local);
+ goto out;
+ }
+ }
- layout = local->layout;
+ /* Calculate hashed subvol based on inode and parent node
+ */
+ hashed_subvol = dht_inode_get_hashed_subvol(local->inode, this,
+ &local->loc);
+ if (!hashed_subvol) {
+ gf_msg(this->name, GF_LOG_DEBUG, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get hashed subvol for path %s"
+ "gfid is %s ",
+ local->loc.path, gfid_local);
+ if (errst)
+ (*errst) = 1;
+ ret = -1;
+ goto out;
+ }
+ xattrs = dict_new();
+ if (!xattrs) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "dict_new failed");
+ ret = -1;
+ goto out;
+ }
+ /* Add internal MDS xattr on disk for hashed subvol
+ */
+ ret = dht_dict_set_array(xattrs, conf->mds_xattr_key, zero, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary"
+ " value:key = %s for "
+ "path %s",
+ conf->mds_xattr_key, local->loc.path);
+ ret = -1;
+ goto out;
+ }
+ /* Create a new frame to wind a call only while
+ this function call by revalidate_cbk code path
+ To wind a call parallel need to create a new frame
+ */
+ if (mark_during_fresh_lookup) {
+ xattr_frame = create_frame(this, this->ctx->pool);
+ if (!xattr_frame) {
+ ret = -1;
+ goto out;
+ }
+ copy_local = dht_local_init(xattr_frame, &(local->loc), NULL, 0);
+ if (!copy_local) {
+ ret = -1;
+ DHT_STACK_DESTROY(xattr_frame);
+ goto out;
+ }
+ copy_local->stbuf = local->stbuf;
+ copy_local->mds_heal_fresh_lookup = mark_during_fresh_lookup;
+ if (!copy_local->inode)
+ copy_local->inode = inode_ref(local->inode);
+ gf_uuid_copy(copy_local->loc.gfid, local->gfid);
+ FRAME_SU_DO(xattr_frame, dht_local_t);
+ STACK_WIND_COOKIE(xattr_frame, dht_common_mark_mdsxattr_cbk,
+ hashed_subvol, hashed_subvol,
+ hashed_subvol->fops->setxattr, &local->loc,
+ xattrs, 0, NULL);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_common_mark_mdsxattr_cbk,
+ (void *)hashed_subvol, hashed_subvol,
+ hashed_subvol->fops->setxattr, &local->loc,
+ xattrs, 0, NULL);
+ }
+ } else {
+ gf_msg_debug(this->name, 0,
+ "internal xattr %s is present on subvol"
+ "on path %s gfid is %s ",
+ conf->mds_xattr_key, local->loc.path, gfid_local);
+ if (!mark_during_fresh_lookup)
+ dht_selfheal_dir_setattr(frame, &local->loc, &local->stbuf,
+ 0xffffffff, layout);
+ }
+out:
+ if (xattrs)
+ dict_unref(xattrs);
+ return ret;
+}
- /* Check if the gfid is different for file from other node */
- if (!op_ret && gf_uuid_compare (local->gfid, stbuf->ia_gfid)) {
+/* Get the value of key from dict in the bytewise and save in array after
+ convert from network byte order to host byte order
+*/
+static int32_t
+dht_dict_get_array(dict_t *dict, char *key, int32_t value[], int32_t size,
+ int *errst)
+{
+ void *ptr = NULL;
+ int32_t len = -1;
+ int32_t vindex = -1;
+ int32_t err = -1;
+ int ret = 0;
+
+ if (dict == NULL) {
+ (*errst) = -1;
+ return -EINVAL;
+ }
+ err = dict_get_ptr_and_len(dict, key, &ptr, &len);
+ if (err != 0) {
+ (*errst) = -1;
+ return err;
+ }
+
+ if (len != (size * sizeof(int32_t))) {
+ (*errst) = -1;
+ return -EINVAL;
+ }
+
+ for (vindex = 0; vindex < size; vindex++) {
+ value[vindex] = ntoh32(*((int32_t *)ptr + vindex));
+ if (value[vindex] < 0)
+ ret = -1;
+ }
+
+ return ret;
+}
- gf_uuid_unparse(stbuf->ia_gfid, gfid_node);
- gf_uuid_unparse(local->gfid, gfid_local);
+static int
+dht_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ int is_dir = 0;
+ int32_t check_mds = 0;
+ int is_linkfile = 0;
+ int attempt_unwind = 0;
+ dht_conf_t *conf = 0;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char gfid_node[GF_UUID_BUF_SIZE] = {0};
+ int32_t mds_xattr_val[1] = {0};
+ int errst = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ layout = local->layout;
+
+ /* Check if the gfid is different for file from other node */
+ if (!op_ret && gf_uuid_compare(local->gfid, stbuf->ia_gfid)) {
+ gf_uuid_unparse(stbuf->ia_gfid, gfid_node);
+ gf_uuid_unparse(local->gfid, gfid_local);
+
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_MISMATCH,
+ "%s: gfid different on %s, gfid local = %s"
+ "gfid other = %s",
+ local->loc.path, prev->name, gfid_local, gfid_node);
+ }
+
+ LOCK(&frame->lock);
+ {
+ /* TODO: assert equal mode on stbuf->st_mode and
+ local->stbuf->st_mode
+
+ else mkdir/chmod/chown and fix
+ */
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_GFID_MISMATCH,
- "%s: gfid different on %s, gfid local = %s"
- "gfid other = %s",
- local->loc.path, prev->this->name,
- gfid_local, gfid_node);
- }
+ ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, xattr);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ "%s: failed to merge layouts for subvol %s", local->loc.path,
+ prev->name);
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_msg_debug(this->name, op_errno,
+ "lookup of %s on %s returned error", local->loc.path,
+ prev->name);
- LOCK (&frame->lock);
- {
- /* TODO: assert equal mode on stbuf->st_mode and
- local->stbuf->st_mode
+ goto unlock;
+ }
- else mkdir/chmod/chown and fix
- */
- ret = dht_layout_merge (this, layout, prev->this,
- op_ret, op_errno, xattr);
- if (ret)
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_LAYOUT_MERGE_FAILED,
- "%s: failed to merge layouts for subvol %s",
- local->loc.path, prev->this->name);
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_msg_debug (this->name, 0,
- "lookup of %s on %s returned error (%s)",
- local->loc.path, prev->this->name,
- strerror (op_errno));
-
- goto unlock;
- }
+ is_linkfile = check_is_linkfile(inode, stbuf, xattr,
+ conf->link_xattr_name);
+ is_dir = check_is_dir(inode, stbuf, xattr);
- is_linkfile = check_is_linkfile (inode, stbuf, xattr,
- conf->link_xattr_name);
- is_dir = check_is_dir (inode, stbuf, xattr);
+ if (is_dir) {
+ local->dir_count++;
+ } else {
+ local->file_count++;
+
+ if (!is_linkfile && !local->cached_subvol) {
+ /* real file */
+ /* Ok, we somehow managed to find a file on
+ * more than one subvol. ignore this or we
+ * will end up overwriting information while a
+ * a thread is potentially unwinding from
+ * dht_discover_complete
+ */
+ local->cached_subvol = prev;
+ attempt_unwind = 1;
+ } else {
+ goto unlock;
+ }
+ }
- if (is_dir) {
- local->dir_count ++;
- } else {
- local->file_count ++;
-
- if (!is_linkfile) {
- /* real file */
- local->cached_subvol = prev->this;
- attempt_unwind = 1;
- } else {
- goto unlock;
- }
- }
+ local->op_ret = 0;
- local->op_ret = 0;
+ if (local->xattr == NULL) {
+ local->xattr = dict_ref(xattr);
+ } else {
+ /* Don't aggregate for files. See BZ#1484709 */
+ if (is_dir)
+ dht_aggregate_xattr(local->xattr, xattr);
+ }
- if (local->xattr == NULL) {
- local->xattr = dict_ref (xattr);
- } else {
- dht_aggregate_xattr (local->xattr, xattr);
- }
+ if (local->inode == NULL)
+ local->inode = inode_ref(inode);
- if (local->inode == NULL)
- local->inode = inode_ref (inode);
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->postparent, postparent);
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent,
- prev->this);
- }
+ if (!dict_get(xattr, conf->mds_xattr_key)) {
+ goto unlock;
+ } else {
+ gf_msg_debug(this->name, 0,
+ "internal xattr %s is present on subvol"
+ "on path %s gfid is %s ",
+ conf->mds_xattr_key, local->loc.path, gfid_local);
+ }
+ check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key,
+ mds_xattr_val, 1, &errst);
+ /* save mds subvol on inode ctx */
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set hashed subvol for %s vol is %s",
+ local->loc.path, prev->name);
+ }
+
+ if ((check_mds < 0) && !errst) {
+ local->mds_xattr = dict_ref(xattr);
+ gf_msg_debug(this->name, 0,
+ "Value of %s is not zero on mds subvol"
+ "so xattr needs to be healed on non mds"
+ " path is %s and vol name is %s "
+ " gfid is %s",
+ conf->mds_xattr_key, local->loc.path, prev->name,
+ gfid_local);
+ local->need_xattr_heal = 1;
+ local->mds_subvol = prev;
+ }
+ }
unlock:
- UNLOCK (&frame->lock);
+ UNLOCK(&frame->lock);
out:
- /* Make sure, the thread executing dht_discover_complete is the one
- * which calls STACK_DESTROY (frame). In the case of "attempt_unwind",
- * this makes sure that the thread don't call dht_frame_return, till
- * call to dht_discover_complete is done.
- */
- if (attempt_unwind) {
- dht_discover_complete (this, frame);
- }
+ /* Make sure, the thread executing dht_discover_complete is the one
+ * which calls STACK_DESTROY (frame). In the case of "attempt_unwind",
+ * this makes sure that the thread don't call dht_frame_return, till
+ * call to dht_discover_complete is done.
+ */
+ if (attempt_unwind) {
+ dht_discover_complete(this, frame);
+ }
- this_call_cnt = dht_frame_return (frame);
+ this_call_cnt = dht_frame_return(frame);
- if (is_last_call (this_call_cnt) && !attempt_unwind) {
- dht_discover_complete (this, frame);
- }
+ if (is_last_call(this_call_cnt) && !attempt_unwind) {
+ dht_discover_complete(this, frame);
+ }
- if (is_last_call (this_call_cnt))
- DHT_STACK_DESTROY (frame);
+ if (is_last_call(this_call_cnt))
+ DHT_STACK_DESTROY(frame);
- return 0;
+ return 0;
}
+static int
+dht_set_file_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ int ret = -EINVAL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf) {
+ goto err;
+ }
+
+ if (!xattr_req) {
+ goto err;
+ }
+
+ /* Used to check whether this is a linkto file.
+ */
+ ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s",
+ conf->link_xattr_name, loc->path);
+ goto err;
+ }
+
+ /* This is used to make sure we don't unlink linkto files
+ * which are the target of an ongoing file migration.
+ */
+ ret = dict_set_uint32(xattr_req, GLUSTERFS_OPEN_FD_COUNT, 4);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s",
+ GLUSTERFS_OPEN_FD_COUNT, loc->path);
+ goto err;
+ }
+
+ ret = 0;
+err:
+ return ret;
+}
-int
-dht_discover (call_frame_t *frame, xlator_t *this, loc_t *loc)
+/* This is a gfid based nameless lookup. Without a name, the hashed subvol
+ * cannot be calculated so a lookup is sent to all subvols.
+ */
+static int
+dht_do_discover(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- int ret;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int call_cnt = 0;
- int op_errno = EINVAL;
- int i = 0;
- call_frame_t *discover_frame = NULL;
+ int ret;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int call_cnt = 0;
+ int op_errno = EINVAL;
+ int i = 0;
+ call_frame_t *discover_frame = NULL;
+
+ conf = this->private;
+ local = frame->local;
+
+ /* As we do not know if this is a file or directory, request
+ * both file and directory xattrs
+ */
+ ret = dht_set_file_xattr_req(this, loc, local->xattr_req);
+ if (ret) {
+ goto err;
+ }
+
+ ret = dht_set_dir_xattr_req(this, loc, local->xattr_req);
+ if (ret) {
+ goto err;
+ }
+
+ if (loc_is_root(loc)) {
+ /* Request the DHT commit hash xattr (trusted.glusterfs.dht.commithash)
+ * set on the brick root.
+ */
+ ret = dict_set_uint32(local->xattr_req, conf->commithash_xattr_name,
+ sizeof(uint32_t));
+ }
- conf = this->private;
- local = frame->local;
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
- ret = dict_set_uint32 (local->xattr_req, conf->xattr_name, 4 * 4);
- if (ret)
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:key = %s",
- loc->path, conf->xattr_name);
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
- ret = dict_set_uint32 (local->xattr_req, conf->link_xattr_name, 256);
- if (ret)
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:key = %s",
- loc->path, conf->link_xattr_name);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
+ gf_uuid_copy(local->gfid, loc->gfid);
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
+ discover_frame = copy_frame(frame);
+ if (!discover_frame) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- if (!local->layout) {
- op_errno = ENOMEM;
- goto err;
- }
+ discover_frame->local = local;
+ frame->local = NULL;
+ local->main_frame = frame;
- gf_uuid_copy (local->gfid, loc->gfid);
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(discover_frame, dht_discover_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ local->xattr_req);
+ }
- discover_frame = copy_frame (frame);
- if (!discover_frame) {
- op_errno = ENOMEM;
- goto err;
- }
+ return 0;
- discover_frame->local = local;
- frame->local = NULL;
- local->main_frame = frame;
+err:
+ DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (discover_frame, dht_discover_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- &local->loc, local->xattr_req);
- }
+ return 0;
+}
- return 0;
+/* Code to call syntask to heal custom xattr from hashed subvol
+ to non hashed subvol
+*/
+int
+dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno)
+{
+ dht_local_t *copy_local = NULL;
+ call_frame_t *copy = NULL;
+ int ret = -1;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+
+ if (gf_uuid_is_null(local->gfid)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "No gfid exists for path %s "
+ "so healing xattr is not possible",
+ local->loc.path);
+ *op_errno = EIO;
+ goto out;
+ }
+
+ gf_uuid_unparse(local->gfid, gfid_local);
+ copy = create_frame(this, this->ctx->pool);
+ if (copy) {
+ copy_local = dht_local_init(copy, &(local->loc), NULL, 0);
+ if (!copy_local) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "Memory allocation failed "
+ "for path %s gfid %s ",
+ local->loc.path, gfid_local);
+ *op_errno = ENOMEM;
+ DHT_STACK_DESTROY(copy);
+ } else {
+ copy_local->stbuf = local->stbuf;
+ gf_uuid_copy(copy_local->loc.gfid, local->gfid);
+ copy_local->mds_subvol = local->mds_subvol;
+ FRAME_SU_DO(copy, dht_local_t);
+ ret = synctask_new(this->ctx->env, dht_dir_heal_xattrs,
+ dht_dir_heal_xattrs_done, copy, copy);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "Synctask creation failed to heal xattr "
+ "for path %s gfid %s ",
+ local->loc.path, gfid_local);
+ *op_errno = ENOMEM;
+ DHT_STACK_DESTROY(copy);
+ }
+ }
+ }
+out:
+ return ret;
+}
-err:
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
+static int
+dht_needs_selfheal(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int needs_selfheal = 0;
+ int ret = 0;
+
+ local = frame->local;
+ layout = local->layout;
+
+ if (local->need_attrheal || local->need_xattr_heal ||
+ local->need_selfheal) {
+ needs_selfheal = 1;
+ }
+
+ ret = dht_layout_normalize(this, &local->loc, layout);
+
+ if (ret != 0) {
+ gf_msg_debug(this->name, 0, "fixing assignment on %s", local->loc.path);
+ needs_selfheal = 1;
+ }
+ return needs_selfheal;
+}
+static int
+is_permission_different(ia_prot_t *prot1, ia_prot_t *prot2)
+{
+ if ((prot1->owner.read != prot2->owner.read) ||
+ (prot1->owner.write != prot2->owner.write) ||
+ (prot1->owner.exec != prot2->owner.exec) ||
+ (prot1->group.read != prot2->group.read) ||
+ (prot1->group.write != prot2->group.write) ||
+ (prot1->group.exec != prot2->group.exec) ||
+ (prot1->other.read != prot2->other.read) ||
+ (prot1->other.write != prot2->other.write) ||
+ (prot1->other.exec != prot2->other.exec) ||
+ (prot1->suid != prot2->suid) || (prot1->sgid != prot2->sgid) ||
+ (prot1->sticky != prot2->sticky)) {
+ return 1;
+ } else {
return 0;
+ }
}
-
int
-dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- int ret = -1;
- int is_dir = 0;
- char gfid_local[GF_UUID_BUF_SIZE] = {0};
- char gfid_node[GF_UUID_BUF_SIZE] = {0};
-
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", this->private, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ int is_dir = 0;
+ int32_t check_mds = 0;
+ int errst = 0;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char gfid_node[GF_UUID_BUF_SIZE] = {0};
+ int32_t mds_xattr_val[1] = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ layout = local->layout;
+ gf_msg_debug(this->name, op_errno,
+ "%s: lookup on %s returned with op_ret = %d, op_errno = %d",
+ local->loc.path, prev->name, op_ret, op_errno);
+
+ /* The first successful lookup*/
+ if (!op_ret && gf_uuid_is_null(local->gfid)) {
+ memcpy(local->gfid, stbuf->ia_gfid, 16);
+ }
+ if (!gf_uuid_is_null(local->gfid)) {
+ gf_uuid_unparse(local->gfid, gfid_local);
+ }
+
+ /* Check if the gfid is different for file from other node */
+ if (!op_ret && gf_uuid_compare(local->gfid, stbuf->ia_gfid)) {
+ gf_uuid_unparse(stbuf->ia_gfid, gfid_node);
+
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_MISMATCH,
+ "%s: gfid different on %s."
+ " gfid local = %s, gfid subvol = %s",
+ local->loc.path, prev->name, gfid_local, gfid_node);
+ }
+
+ LOCK(&frame->lock);
+ {
+ /* TODO: assert equal mode on stbuf->st_mode and
+ local->stbuf->st_mode
+ else mkdir/chmod/chown and fix
+ */
+ ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, xattr);
- local = frame->local;
- prev = cookie;
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
- layout = local->layout;
+ /* The GFID is missing on this subvol. Force a heal. */
+ if (op_errno == ENODATA) {
+ local->need_lookup_everywhere = 1;
+ }
+ goto unlock;
+ }
- if (!op_ret && gf_uuid_is_null (local->gfid))
- memcpy (local->gfid, stbuf->ia_gfid, 16);
+ is_dir = check_is_dir(inode, stbuf, xattr);
+ if (!is_dir) {
+ gf_msg_debug(this->name, 0,
+ "%s: lookup on %s returned non dir 0%o"
+ "calling lookup_everywhere",
+ local->loc.path, prev->name, stbuf->ia_type);
+ local->need_lookup_everywhere = 1;
+ goto unlock;
+ }
- /* Check if the gfid is different for file from other node */
- if (!op_ret && gf_uuid_compare (local->gfid, stbuf->ia_gfid)) {
+ local->op_ret = 0;
+ if (local->xattr == NULL) {
+ local->xattr = dict_ref(xattr);
+ } else {
+ dht_aggregate_xattr(local->xattr, xattr);
+ }
- gf_uuid_unparse(stbuf->ia_gfid, gfid_node);
- gf_uuid_unparse(local->gfid, gfid_local);
+ if (__is_root_gfid(stbuf->ia_gfid)) {
+ ret = dht_dir_has_layout(xattr, conf->xattr_name);
+ if (ret >= 0) {
+ if (is_greater_time(local->prebuf.ia_ctime,
+ local->prebuf.ia_ctime_nsec,
+ stbuf->ia_ctime, stbuf->ia_ctime_nsec)) {
+ /* Choose source */
+ local->prebuf.ia_gid = stbuf->ia_gid;
+ local->prebuf.ia_uid = stbuf->ia_uid;
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_GFID_MISMATCH,
- "%s: gfid different on %s."
- " gfid local = %s, gfid subvol = %s",
- local->loc.path, prev->this->name,
- gfid_local, gfid_node);
+ local->prebuf.ia_ctime = stbuf->ia_ctime;
+ local->prebuf.ia_ctime_nsec = stbuf->ia_ctime_nsec;
+ local->prebuf.ia_prot = stbuf->ia_prot;
+ }
+ }
}
- LOCK (&frame->lock);
- {
- /* TODO: assert equal mode on stbuf->st_mode and
- local->stbuf->st_mode
+ if (local->stbuf.ia_type != IA_INVAL) {
+ /* This is not the first subvol to respond
+ * Compare values to see if attrs need to be healed
+ */
+ if ((local->stbuf.ia_gid != stbuf->ia_gid) ||
+ (local->stbuf.ia_uid != stbuf->ia_uid) ||
+ (is_permission_different(&local->stbuf.ia_prot,
+ &stbuf->ia_prot))) {
+ local->need_attrheal = 1;
+ }
+ }
- else mkdir/chmod/chown and fix
- */
- ret = dht_layout_merge (this, layout, prev->this,
- op_ret, op_errno, xattr);
+ if (local->inode == NULL)
+ local->inode = inode_ref(inode);
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_msg_debug (this->name, 0,
- "lookup of %s on %s returned error (%s)",
- local->loc.path, prev->this->name,
- strerror (op_errno));
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->postparent, postparent);
- goto unlock;
- }
+ if (!dict_get(xattr, conf->mds_xattr_key)) {
+ gf_msg_debug(this->name, 0,
+ "%s: mds xattr %s is not present "
+ "on %s(gfid = %s)",
+ local->loc.path, conf->mds_xattr_key, prev->name,
+ gfid_local);
+ goto unlock;
+ }
- is_dir = check_is_dir (inode, stbuf, xattr);
- if (!is_dir) {
+ /* Save the mds subvol info and stbuf. This is the value that will
+ * be used for healing
+ */
+ local->mds_subvol = prev;
+ local->mds_stbuf = *stbuf;
- gf_msg_debug (this->name, 0,
- "lookup of %s on %s returned non"
- "dir 0%o"
- "calling lookup_everywhere",
- local->loc.path, prev->this->name,
- stbuf->ia_type);
+ /* Save mds subvol on inode ctx */
- local->need_selfheal = 1;
- goto unlock;
- }
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "%s: Failed to set mds (%s)", local->loc.path, prev->name);
+ }
+ check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key,
+ mds_xattr_val, 1, &errst);
+ if ((check_mds < 0) && !errst) {
+ /* Check if xattrs need to be healed on the directories */
+ local->mds_xattr = dict_ref(xattr);
+ gf_msg_debug(this->name, 0,
+ "%s: %s is not zero on %s. Xattrs need to be healed."
+ "(gfid = %s)",
+ local->loc.path, conf->mds_xattr_key, prev->name,
+ gfid_local);
+ local->need_xattr_heal = 1;
+ }
+ }
- local->op_ret = 0;
- if (local->xattr == NULL) {
- local->xattr = dict_ref (xattr);
- } else {
- dht_aggregate_xattr (local->xattr, xattr);
- }
+unlock:
+ UNLOCK(&frame->lock);
- if (local->inode == NULL)
- local->inode = inode_ref (inode);
+ this_call_cnt = dht_frame_return(frame);
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent,
- prev->this);
+ if (is_last_call(this_call_cnt)) {
+ /* If the mds subvol is not set correctly*/
+ if (!__is_root_gfid(local->gfid) &&
+ (!dict_get(local->xattr, conf->mds_xattr_key))) {
+ local->need_selfheal = 1;
}
-unlock:
- UNLOCK (&frame->lock);
+ /* No need to call xattr heal code if volume count is 1
+ */
+ if (conf->subvolume_cnt == 1) {
+ local->need_xattr_heal = 0;
+ }
- this_call_cnt = dht_frame_return (frame);
+ if (local->need_selfheal || local->need_lookup_everywhere) {
+ /* Set the gfid-req so posix will set the GFID*/
+ if (!gf_uuid_is_null(local->gfid)) {
+ /* Ok, this should _never_ happen */
+ ret = dict_set_static_bin(local->xattr_req, "gfid-req",
+ local->gfid, 16);
+ } else {
+ if (!gf_uuid_is_null(local->gfid_req))
+ ret = dict_set_static_bin(local->xattr_req, "gfid-req",
+ local->gfid_req, 16);
+ }
+ }
- if (is_last_call (this_call_cnt)) {
- if (local->need_selfheal) {
- local->need_selfheal = 0;
- dht_lookup_everywhere (frame, this, &local->loc);
- return 0;
- }
+ if (local->need_lookup_everywhere) {
+ local->need_lookup_everywhere = 0;
+ dht_lookup_everywhere(frame, this, &local->loc);
+ return 0;
+ }
- if (local->op_ret == 0) {
- ret = dht_layout_normalize (this, &local->loc, layout);
+ if (local->op_ret == 0) {
+ if (dht_needs_selfheal(frame, this)) {
+ goto selfheal;
+ }
- if (ret != 0) {
- gf_msg_debug (this->name, 0,
- "fixing assignment on %s",
- local->loc.path);
- goto selfheal;
- }
+ dht_layout_set(this, local->inode, layout);
+ if (local->inode) {
+ dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1);
+ }
- dht_layout_set (this, local->inode, layout);
- }
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
+ }
+ }
- if (local->loc.parent) {
- dht_inode_ctx_time_update (local->loc.parent, this,
- &local->postparent, 1);
- }
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->postparent);
+ /* Delete mds xattr at the time of STACK UNWIND */
+ if (local->xattr)
+ GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr);
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xattr,
- &local->postparent);
- }
+ DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ }
- return 0;
+ return 0;
selfheal:
- FRAME_SU_DO (frame, dht_local_t);
- gf_uuid_copy (local->loc.gfid, local->gfid);
- ret = dht_selfheal_directory (frame, dht_lookup_selfheal_cbk,
- &local->loc, layout);
+ FRAME_SU_DO(frame, dht_local_t);
+ ret = dht_selfheal_directory(frame, dht_lookup_selfheal_cbk, &local->loc,
+ layout);
out:
- return ret;
+ return ret;
+}
+
+static int
+dht_lookup_directory(call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ int call_cnt = 0;
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, unwind);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, unwind);
+ GF_VALIDATE_OR_GOTO("dht", this->private, unwind);
+ GF_VALIDATE_OR_GOTO("dht", loc, unwind);
+
+ conf = this->private;
+ local = frame->local;
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
+ goto unwind;
+ }
+
+ if (local->xattr != NULL) {
+ dict_unref(local->xattr);
+ local->xattr = NULL;
+ }
+
+ if (!gf_uuid_is_null(local->gfid)) {
+ /* use this gfid in order to heal any missing ones */
+ ret = dict_set_gfuuid(local->xattr_req, "gfid-req", local->gfid, true);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value:"
+ " key = gfid-req",
+ local->loc.path);
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(
+ frame, dht_lookup_dir_cbk, conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc, local->xattr_req);
+ }
+ return 0;
+unwind:
+ DHT_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
+out:
+ return 0;
}
int
-dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int is_dir = 0;
- int is_linkfile = 0;
- call_frame_t *copy = NULL;
- dht_local_t *copy_local = NULL;
- char gfid[GF_UUID_BUF_SIZE] = {0};
-
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, err);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, err);
- GF_VALIDATE_OR_GOTO ("dht", cookie, err);
+dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int is_dir = 0;
+ int is_linkfile = 0;
+ int follow_link = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ uint32_t vol_commit_hash = 0;
+ xlator_t *subvol = NULL;
+ int32_t check_mds = 0;
+ int errst = 0, i = 0;
+ int32_t mds_xattr_val[1] = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, err);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, err);
+ GF_VALIDATE_OR_GOTO("dht", cookie, err);
+ GF_VALIDATE_OR_GOTO("dht", this->private, err);
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ if (!conf->vch_forced) {
+ /* Update the commithash value if available
+ */
+ ret = dict_get_uint32(xattr, conf->commithash_xattr_name,
+ &vol_commit_hash);
+ if (ret == 0) {
+ conf->vol_commit_hash = vol_commit_hash;
+ }
+ }
- local = frame->local;
- prev = cookie;
- conf = this->private;
- if (!conf)
- goto out;
+ gf_uuid_unparse(local->loc.gfid, gfid);
- gf_uuid_unparse (local->loc.gfid, gfid);
-
- LOCK (&frame->lock);
- {
-
- gf_msg_debug (this->name, 0,
- "revalidate lookup of %s "
- "returned with op_ret %d and op_errno %d",
- local->loc.path, op_ret, op_errno);
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
-
- if ((op_errno != ENOTCONN)
- && (op_errno != ENOENT)
- && (op_errno != ESTALE)) {
- gf_log (this->name, GF_LOG_INFO,
- "Revalidate: subvolume %s for %s "
- "(gfid = %s) returned -1 (%s)",
- prev->this->name, local->loc.path,
- gfid, strerror (op_errno));
- }
- if (op_errno == ESTALE) {
- /* propagate the ESTALE to parent.
- * setting local->return_estale would send
- * ESTALE to parent. */
- local->return_estale = 1;
- }
+ gf_msg_debug(this->name, op_errno,
+ "%s: revalidate lookup on %s returned op_ret %d",
+ local->loc.path, prev->name, op_ret);
+
+ LOCK(&frame->lock);
+ {
+ if (gf_uuid_is_null(local->gfid)) {
+ memcpy(local->gfid, local->loc.gfid, 16);
+ }
- /* if it is ENOENT, we may have to do a
- * 'lookup_everywhere()' to make sure
- * the file is not migrated */
- if (op_errno == ENOENT) {
- if (IA_ISREG (local->loc.inode->ia_type)) {
-
- gf_msg_debug (this->name, 0,
- "found ENOENT for %s. "
- "Setting "
- "need_lookup_everywhere"
- " flag to 1",
- local->loc.path);
-
- local->need_lookup_everywhere = 1;
- }
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+
+ if ((op_errno != ENOTCONN) && (op_errno != ENOENT) &&
+ (op_errno != ESTALE)) {
+ gf_msg(this->name, GF_LOG_INFO, op_errno,
+ DHT_MSG_REVALIDATE_CBK_INFO,
+ "Revalidate: subvolume %s for %s "
+ "(gfid = %s) returned -1",
+ prev->name, local->loc.path, gfid);
+ }
+ if (op_errno == ESTALE) {
+ /* propagate the ESTALE to parent.
+ * setting local->return_estale would send
+ * ESTALE to parent. */
+ local->return_estale = 1;
+ }
+
+ /* if it is ENOENT, we may have to do a
+ * 'lookup_everywhere()' to make sure
+ * the file is not migrated */
+ if (op_errno == ENOENT) {
+ if (IA_ISREG(local->loc.inode->ia_type)) {
+ gf_msg_debug(this->name, 0,
+ "found ENOENT for %s. "
+ "Setting "
+ "need_lookup_everywhere"
+ " flag to 1",
+ local->loc.path);
+
+ local->need_lookup_everywhere = 1;
+ } else if (IA_ISDIR(local->loc.inode->ia_type)) {
+ layout = local->layout;
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == prev) {
+ layout->list[i].err = op_errno;
+ break;
}
- goto unlock;
+ }
+
+ local->need_selfheal = 1;
}
+ }
- if (stbuf->ia_type != local->inode->ia_type) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_FILE_TYPE_MISMATCH,
- "mismatching filetypes 0%o v/s 0%o for %s,"
- " gfid = %s",
- (stbuf->ia_type), (local->inode->ia_type),
- local->loc.path, gfid);
+ /* The GFID is missing on this subvol. Lookup everywhere to force a
+ * gfid heal
+ */
+ if ((op_errno == ENODATA) &&
+ (IA_ISDIR(local->loc.inode->ia_type))) {
+ local->need_lookup_everywhere = 1;
+ }
- local->op_ret = -1;
- local->op_errno = EINVAL;
+ goto unlock;
+ }
- goto unlock;
- }
+ if ((!IA_ISINVAL(local->inode->ia_type)) &&
+ stbuf->ia_type != local->inode->ia_type) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FILE_TYPE_MISMATCH,
+ "mismatching filetypes 0%o v/s 0%o for %s,"
+ " gfid = %s",
+ (stbuf->ia_type), (local->inode->ia_type), local->loc.path,
+ gfid);
- layout = local->layout;
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
- is_dir = check_is_dir (inode, stbuf, xattr);
- is_linkfile = check_is_linkfile (inode, stbuf, xattr,
- conf->link_xattr_name);
+ goto unlock;
+ }
- if (is_linkfile) {
- gf_log (this->name, GF_LOG_INFO,
- "Revalidate: linkfile found %s, (gfid = %s)",
- local->loc.path, gfid);
- local->return_estale = 1;
+ layout = local->layout;
- goto unlock;
- }
+ is_dir = check_is_dir(inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile(inode, stbuf, xattr,
+ conf->link_xattr_name);
+ if (is_linkfile) {
+ follow_link = 1;
+ goto unlock;
+ }
+ if (is_dir) {
+ ret = dht_dir_has_layout(xattr, conf->xattr_name);
+ if (ret >= 0) {
+ if (is_greater_time(local->prebuf.ia_ctime,
+ local->prebuf.ia_ctime_nsec,
+ stbuf->ia_ctime, stbuf->ia_ctime_nsec)) {
+ /* Choose source */
+ local->prebuf.ia_gid = stbuf->ia_gid;
+ local->prebuf.ia_uid = stbuf->ia_uid;
+
+ local->prebuf.ia_ctime = stbuf->ia_ctime;
+ local->prebuf.ia_ctime_nsec = stbuf->ia_ctime_nsec;
+
+ if (__is_root_gfid(stbuf->ia_gfid))
+ local->prebuf.ia_prot = stbuf->ia_prot;
+ }
+ }
+
+ if (local->stbuf.ia_type != IA_INVAL) {
+ if ((local->stbuf.ia_gid != stbuf->ia_gid) ||
+ (local->stbuf.ia_uid != stbuf->ia_uid) ||
+ is_permission_different(&local->stbuf.ia_prot,
+ &stbuf->ia_prot)) {
+ local->need_attrheal = 1;
+ }
+ }
+
+ if (!dict_get(xattr, conf->mds_xattr_key)) {
+ gf_msg_debug(this->name, 0,
+ "%s: internal xattr %s is not present"
+ " on subvol %s(gfid is %s)",
+ local->loc.path, conf->mds_xattr_key, prev->name,
+ gfid);
+ } else {
+ check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key,
+ mds_xattr_val, 1, &errst);
+ local->mds_subvol = prev;
+ local->mds_stbuf.ia_gid = stbuf->ia_gid;
+ local->mds_stbuf.ia_uid = stbuf->ia_uid;
+ local->mds_stbuf.ia_prot = stbuf->ia_prot;
+
+ /* save mds subvol on inode ctx */
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set MDS subvol for %s vol is %s",
+ local->loc.path, prev->name);
+ }
+ if ((check_mds < 0) && !errst) {
+ /* Check if xattrs need to be healed on the directory
+ */
+ local->mds_xattr = dict_ref(xattr);
+ gf_msg_debug(this->name, 0,
+ "Value of %s is not zero on "
+ "hashed subvol so xattr needs to"
+ " be healed on non hashed"
+ " path is %s and vol name is %s "
+ " gfid is %s",
+ conf->mds_xattr_key, local->loc.path,
+ prev->name, gfid);
+ local->need_xattr_heal = 1;
+ }
+ }
+ ret = dht_layout_dir_mismatch(this, layout, prev, &local->loc,
+ xattr);
+ if (ret != 0) {
+ /* In memory layout does not match on-disk layout.
+ */
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_MISMATCH,
+ "Mismatching layouts for %s, gfid = %s", local->loc.path,
+ gfid);
- if (is_dir) {
- ret = dht_dir_has_layout (xattr, conf->xattr_name);
- if (ret >= 0) {
- if (is_greater_time(local->stbuf.ia_ctime,
- local->stbuf.ia_ctime_nsec,
- stbuf->ia_ctime,
- stbuf->ia_ctime_nsec)) {
- local->prebuf.ia_gid = stbuf->ia_gid;
- local->prebuf.ia_uid = stbuf->ia_uid;
- }
- }
- if (local->stbuf.ia_type != IA_INVAL)
- {
- if ((local->stbuf.ia_gid != stbuf->ia_gid) ||
- (local->stbuf.ia_uid != stbuf->ia_uid)) {
- local->need_selfheal = 1;
- }
- }
- ret = dht_layout_dir_mismatch (this, layout,
- prev->this, &local->loc,
- xattr);
- if (ret != 0) {
- gf_msg (this->name, GF_LOG_INFO, 0,
- DHT_MSG_LAYOUT_MISMATCH,
- "Mismatching layouts for %s, gfid = %s",
- local->loc.path, gfid);
-
- local->layout_mismatch = 1;
-
- goto unlock;
- }
- }
+ local->layout_mismatch = 1;
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent,
- prev->this);
+ goto unlock;
+ }
+ }
- local->op_ret = 0;
+ gf_uuid_copy(local->stbuf.ia_gfid, stbuf->ia_gfid);
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->postparent, postparent);
- if (!local->xattr) {
- local->xattr = dict_ref (xattr);
- } else if (is_dir) {
- dht_aggregate_xattr (local->xattr, xattr);
- }
+ local->op_ret = 0;
+
+ if (!local->xattr) {
+ local->xattr = dict_ref(xattr);
+ } else if (is_dir) {
+ dht_aggregate_xattr(local->xattr, xattr);
}
+ }
unlock:
- UNLOCK (&frame->lock);
-out:
- this_call_cnt = dht_frame_return (frame);
-
- if (is_last_call (this_call_cnt)) {
- if (!IA_ISDIR (local->stbuf.ia_type)
- && (local->hashed_subvol != local->cached_subvol)
- && (local->stbuf.ia_nlink == 1)
- && (conf && conf->unhashed_sticky_bit)) {
- local->stbuf.ia_prot.sticky = 1;
- }
- if (local->need_selfheal) {
- local->need_selfheal = 0;
- gf_uuid_copy (local->gfid, local->stbuf.ia_gfid);
- local->stbuf.ia_gid = local->prebuf.ia_gid;
- local->stbuf.ia_uid = local->prebuf.ia_uid;
- copy = create_frame (this, this->ctx->pool);
- if (copy) {
- copy_local = dht_local_init (copy, &local->loc,
- NULL, 0);
- if (!copy_local)
- goto cont;
- copy_local->stbuf = local->stbuf;
- copy->local = copy_local;
- FRAME_SU_DO (copy, dht_local_t);
- ret = synctask_new (this->ctx->env,
- dht_dir_attr_heal,
- dht_dir_attr_heal_done,
- copy, copy);
- }
- }
-cont:
- if (local->layout_mismatch) {
- /* Found layout mismatch in the directory, need to
- fix this in the inode context */
- dht_layout_unref (this, local->layout);
- local->layout = NULL;
- dht_lookup_directory (frame, this, &local->loc);
- return 0;
- }
+ UNLOCK(&frame->lock);
- if (local->need_lookup_everywhere) {
- /* As the current layout gave ENOENT error, we would
- need a new layout */
- dht_layout_unref (this, local->layout);
- local->layout = NULL;
-
- /* We know that current cached subvol is no more
- valid, get the new one */
- local->cached_subvol = NULL;
- dht_lookup_everywhere (frame, this, &local->loc);
- return 0;
- }
- if (local->return_estale) {
- local->op_ret = -1;
- local->op_errno = ESTALE;
- }
-
- if (local->loc.parent) {
- dht_inode_ctx_time_update (local->loc.parent, this,
- &local->postparent, 1);
- }
+ if (follow_link) {
+ /* Found a linkto file. Follow it to see if the target file exists
+ */
+ gf_uuid_copy(local->gfid, stbuf->ia_gfid);
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xattr,
- &local->postparent);
+ subvol = dht_linkfile_subvol(this, inode, stbuf, xattr);
+ if (!subvol) {
+ op_errno = ESTALE;
+ local->op_ret = -1;
+ } else {
+ STACK_WIND_COOKIE(frame, dht_lookup_linkfile_cbk, subvol, subvol,
+ subvol->fops->lookup, &local->loc,
+ local->xattr_req);
+ return 0;
}
+ }
-err:
- return ret;
-}
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ if (!IA_ISDIR(local->stbuf.ia_type) &&
+ (local->hashed_subvol != local->cached_subvol) &&
+ (local->stbuf.ia_nlink == 1) &&
+ (conf && conf->unhashed_sticky_bit)) {
+ local->stbuf.ia_prot.sticky = 1;
+ }
+ /* No need to call heal code if volume count is 1
+ */
+ if (conf->subvolume_cnt == 1)
+ local->need_xattr_heal = 0;
+
+ if (IA_ISDIR(local->stbuf.ia_type)) {
+ /* No mds xattr found. Trigger a heal to set it */
+ if (!__is_root_gfid(local->loc.inode->gfid) &&
+ (!dict_get(local->xattr, conf->mds_xattr_key)))
+ local->need_selfheal = 1;
+
+ if (dht_needs_selfheal(frame, this)) {
+ if (!__is_root_gfid(local->loc.inode->gfid)) {
+ if (local->mds_subvol) {
+ local->stbuf.ia_gid = local->mds_stbuf.ia_gid;
+ local->stbuf.ia_uid = local->mds_stbuf.ia_uid;
+ local->stbuf.ia_prot = local->mds_stbuf.ia_prot;
+ }
+ } else {
+ local->stbuf.ia_gid = local->prebuf.ia_gid;
+ local->stbuf.ia_uid = local->prebuf.ia_uid;
+ local->stbuf.ia_prot = local->prebuf.ia_prot;
+ }
-int
-dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- char gfid[GF_UUID_BUF_SIZE] = {0};
-
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", this->private, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+ layout = local->layout;
+ dht_selfheal_directory(frame, dht_lookup_selfheal_cbk,
+ &local->loc, layout);
+ return 0;
+ }
+ }
- local = frame->local;
- cached_subvol = local->cached_subvol;
- conf = this->private;
+ if (local->layout_mismatch) {
+ /* Found layout mismatch in the directory, need to
+ fix this in the inode context */
+ dht_layout_unref(this, local->layout);
+ local->layout = NULL;
+ dht_lookup_directory(frame, this, &local->loc);
+ return 0;
+ }
- gf_uuid_unparse(local->loc.gfid, gfid);
+ if (local->need_lookup_everywhere) {
+ /* As the current layout gave ENOENT error, we would
+ need a new layout */
+ dht_layout_unref(this, local->layout);
+ local->layout = NULL;
- ret = dht_layout_preset (this, local->cached_subvol, local->loc.inode);
- if (ret < 0) {
- gf_msg_debug (this->name, 0,
- "Failed to set layout for subvolume %s, "
- "(gfid = %s)",
- cached_subvol ? cached_subvol->name : "<nil>",
- gfid);
- local->op_ret = -1;
- local->op_errno = EINVAL;
- goto unwind;
- }
+ /* We know that current cached subvol is no longer
+ valid, get the new one */
+ local->cached_subvol = NULL;
+ if (local->xattr_req) {
+ if (!gf_uuid_is_null(local->gfid)) {
+ ret = dict_set_static_bin(local->xattr_req, "gfid-req",
+ local->gfid, 16);
+ }
+ }
- local->op_ret = 0;
- if ((local->stbuf.ia_nlink == 1)
- && (conf && conf->unhashed_sticky_bit)) {
- local->stbuf.ia_prot.sticky = 1;
+ dht_lookup_everywhere(frame, this, &local->loc);
+ return 0;
+ }
+ if (local->return_estale) {
+ local->op_ret = -1;
+ local->op_errno = ESTALE;
}
if (local->loc.parent) {
- dht_inode_ctx_time_update (local->loc.parent, this,
- postparent, 1);
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
}
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->postparent);
+
+ /* local->stbuf is updated only from subvols which have a layout
+ * The reason is to avoid choosing attr heal source from newly
+ * added bricks. In case e.g we have only one subvol and for
+ * some reason layout is not present on it, then local->stbuf
+ * will be EINVAL. This is an indication that the subvols
+ * active in the cluster do not have layouts on disk.
+ * Unwind with ESTALE to trigger a fresh lookup */
+ if (is_dir && local->stbuf.ia_type == IA_INVAL) {
+ local->op_ret = -1;
+ local->op_errno = ESTALE;
+ }
+ /* Delete mds xattr at the time of STACK UNWIND */
+ if (local->xattr)
+ GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr);
+
+ DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ }
+
+err:
+ return ret;
+}
+
+static int
+dht_lookup_linkfile_create_cbk(call_frame_t *frame, void *cooie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", this->private, out);
+
+ local = frame->local;
+ cached_subvol = local->cached_subvol;
+ conf = this->private;
+
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ if (local->locked)
+ dht_unlock_namespace(frame, &local->lock[0]);
+
+ ret = dht_layout_preset(this, local->cached_subvol, local->loc.inode);
+ if (ret < 0) {
+ gf_msg_debug(this->name, EINVAL,
+ "Failed to set layout for subvolume %s, "
+ "(gfid = %s)",
+ cached_subvol ? cached_subvol->name : "<nil>", gfid);
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto unwind;
+ }
+
+ local->op_ret = 0;
+ if ((local->stbuf.ia_nlink == 1) && (conf && conf->unhashed_sticky_bit)) {
+ local->stbuf.ia_prot.sticky = 1;
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
+ }
+
unwind:
- gf_msg_debug (this->name, 0,
- "creation of linkto on hashed subvol:%s, "
- "returned with op_ret %d and op_errno %d: %s",
- local->hashed_subvol->name,
- op_ret, op_errno, uuid_utoa (local->loc.gfid));
+ gf_msg_debug(this->name, 0,
+ "creation of linkto on hashed subvol:%s, "
+ "returned with op_ret %d and op_errno %d: %s",
+ local->hashed_subvol->name, op_ret, op_errno,
+ uuid_utoa(local->loc.gfid));
- if (local->linked == _gf_true)
- dht_linkfile_attr_heal (frame, this);
+ if (local->linked == _gf_true)
+ dht_linkfile_attr_heal(frame, this);
+ dht_set_fixed_dir_stat(&local->postparent);
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xattr,
- &local->postparent);
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
out:
- return ret;
+ return ret;
}
-int
-dht_lookup_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+static int
+dht_lookup_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- int this_call_cnt = 0;
- dht_local_t *local = NULL;
- const char *path = NULL;
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ const char *path = NULL;
- local = (dht_local_t*)frame->local;
- path = local->loc.path;
+ local = (dht_local_t *)frame->local;
+ path = local->loc.path;
+ FRAME_SU_UNDO(frame, dht_local_t);
- gf_log (this->name, GF_LOG_INFO, "lookup_unlink returned with "
- "op_ret -> %d and op-errno -> %d for %s", op_ret, op_errno,
- ((path == NULL)? "null" : path ));
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_UNLINK_LOOKUP_INFO,
+ "lookup_unlink returned with "
+ "op_ret -> %d and op-errno -> %d for %s",
+ op_ret, op_errno, ((path == NULL) ? "null" : path));
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- dht_lookup_everywhere_done (frame, this);
- }
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ dht_lookup_everywhere_done(frame, this);
+ }
- return 0;
+ return 0;
}
-int
-dht_lookup_unlink_of_false_linkto_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+static int
+dht_lookup_unlink_of_false_linkto_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- int this_call_cnt = 0;
- dht_local_t *local = NULL;
- const char *path = NULL;
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ const char *path = NULL;
- local = (dht_local_t*)frame->local;
- path = local->loc.path;
+ local = (dht_local_t *)frame->local;
+ path = local->loc.path;
- gf_log (this->name, GF_LOG_INFO, "lookup_unlink returned with "
- "op_ret -> %d and op-errno -> %d for %s", op_ret, op_errno,
- ((path == NULL)? "null" : path ));
+ FRAME_SU_UNDO(frame, dht_local_t);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_UNLINK_LOOKUP_INFO,
+ "lookup_unlink returned with "
+ "op_ret -> %d and op-errno -> %d for %s",
+ op_ret, op_errno, ((path == NULL) ? "null" : path));
- if (op_ret == 0) {
- dht_lookup_everywhere_done (frame, this);
- } else {
- /*When dht_lookup_everywhere is performed, one cached
- *and one hashed file was found and hashed file does
- *not point to the above mentioned cached node. So it
- *was considered as stale and an unlink was performed.
- *But unlink fails. So may be rebalance is in progress.
- *now ideally we have two data-files. One obtained during
- *lookup_everywhere and one where unlink-failed. So
- *at this point in time we cannot decide which one to
- *choose because there are chances of first cached
- *file is truncated after rebalance and if it is chosen
- *as cached node, application will fail. So return EIO.*/
-
- if (op_errno == EBUSY) {
-
- gf_log (this->name, GF_LOG_ERROR,
- "Could not unlink the linkto file as "
- "either fd is open and/or linkto xattr "
- "is set for %s",
- ((path == NULL)? "null":path));
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ if ((op_ret == 0) || ((op_errno != EBUSY) && (op_errno != ENOTCONN))) {
+ dht_lookup_everywhere_done(frame, this);
+ } else {
+ /*When dht_lookup_everywhere is performed, one cached
+ *and one hashed file was found and hashed file does
+ *not point to the above mentioned cached node. So it
+ *was considered as stale and an unlink was performed.
+ *But unlink fails. So may be rebalance is in progress.
+ *now ideally we have two data-files. One obtained during
+ *lookup_everywhere and one where unlink-failed. So
+ *at this point in time we cannot decide which one to
+ *choose because there are chances of first cached
+ *file is truncated after rebalance and if it is chosen
+ *as cached node, application will fail. So return EIO.*/
+
+ if (op_errno == EBUSY) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_UNLINK_FAILED,
+ "Could not unlink the linkto file as "
+ "either fd is open and/or linkto xattr "
+ "is set for %s",
+ ((path == NULL) ? "null" : path));
+ }
+ DHT_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, NULL, NULL);
+ }
+ }
+
+ return 0;
+}
- }
- DHT_STACK_UNWIND (lookup, frame, -1, EIO, NULL, NULL,
- NULL, NULL);
+static int
+dht_lookup_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ const char *path = NULL;
- }
- }
+ /* NOTE:
+ * If stale file unlink fails either there is an open-fd or is not an
+ * dht-linkto-file then posix_unlink returns EBUSY, which is overwritten
+ * to ENOENT
+ */
- return 0;
+ local = frame->local;
+
+ if (local) {
+ FRAME_SU_UNDO(frame, dht_local_t);
+ if (local->loc.path)
+ path = local->loc.path;
+ }
+
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_UNLINK_LOOKUP_INFO,
+ "Returned with op_ret %d and "
+ "op_errno %d for %s",
+ op_ret, op_errno, ((path == NULL) ? "null" : path));
+
+ DHT_STACK_UNWIND(lookup, frame, -1, ENOENT, NULL, NULL, NULL, NULL);
+
+ return 0;
}
-int
-dht_lookup_unlink_stale_linkto_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+static int
+dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_t *dict)
{
+ int ret = 0;
- dht_local_t *local = NULL;
- const char *path = NULL;
-
- /* NOTE:
- * If stale file unlink fails either there is an open-fd or is not an
- * dht-linkto-file then posix_unlink returns EBUSY, which is overwritten
- * to ENOENT
- */
+ ret = dict_set_int32_sizen(dict, DHT_SKIP_NON_LINKTO_UNLINK, 1);
- local = frame->local;
+ if (ret)
+ return -1;
- if (local && local->loc.path)
- path = local->loc.path;
+ ret = dict_set_int32_sizen(dict, DHT_SKIP_OPEN_FD_UNLINK, 1);
- gf_log (this->name, GF_LOG_INFO, "Returned with op_ret %d and "
- "op_errno %d for %s", op_ret, op_errno,
- ((path==NULL)?"null":path));
+ if (ret)
+ return -1;
- DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL, NULL, NULL,
- NULL);
+ return 0;
+}
- return 0;
+static int32_t
+dht_linkfile_create_lookup_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int call_cnt = 0, ret = 0;
+ xlator_t *subvol = NULL;
+ uuid_t gfid = {
+ 0,
+ };
+ char gfid_str[GF_UUID_BUF_SIZE] = {0};
+
+ subvol = cookie;
+ local = frame->local;
+
+ if (subvol == local->hashed_subvol) {
+ if ((op_ret == 0) || (op_errno != ENOENT))
+ local->dont_create_linkto = _gf_true;
+ } else {
+ if (gf_uuid_is_null(local->gfid))
+ gf_uuid_copy(gfid, local->loc.gfid);
+ else
+ gf_uuid_copy(gfid, local->gfid);
+
+ if ((op_ret == 0) && gf_uuid_compare(gfid, buf->ia_gfid)) {
+ gf_uuid_unparse(gfid, gfid_str);
+ gf_msg_debug(this->name, 0,
+ "gfid (%s) different on cached subvol "
+ "(%s) and looked up inode (%s), not "
+ "creating linkto",
+ uuid_utoa(buf->ia_gfid), subvol->name, gfid_str);
+ local->dont_create_linkto = _gf_true;
+ } else if (op_ret == -1) {
+ local->dont_create_linkto = _gf_true;
+ }
+ }
+
+ call_cnt = dht_frame_return(frame);
+ if (is_last_call(call_cnt)) {
+ if (local->dont_create_linkto)
+ goto no_linkto;
+ else {
+ gf_msg_debug(this->name, 0,
+ "Creating linkto file on %s(hash) to "
+ "%s on %s (gfid = %s)",
+ local->hashed_subvol->name, local->loc.path,
+ local->cached_subvol->name, gfid_str);
+
+ ret = dht_linkfile_create(frame, dht_lookup_linkfile_create_cbk,
+ this, local->cached_subvol,
+ local->hashed_subvol, &local->loc);
+
+ if (ret < 0)
+ goto no_linkto;
+ }
+ }
+
+ return 0;
+
+no_linkto:
+ gf_msg_debug(this->name, 0,
+ "skipped linkto creation (path:%s) (gfid:%s) "
+ "(hashed-subvol:%s) (cached-subvol:%s)",
+ local->loc.path, gfid_str, local->hashed_subvol->name,
+ local->cached_subvol->name);
+
+ dht_lookup_linkfile_create_cbk(frame, NULL, this, 0, 0, local->loc.inode,
+ &local->stbuf, &local->preparent,
+ &local->postparent, local->xattr);
+ return 0;
}
-int
-dht_fill_dict_to_avoid_unlink_of_migrating_file (dict_t *dict) {
+static int32_t
+dht_call_lookup_linkfile_create(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ int i = 0;
+ xlator_t *subvol = NULL;
- int ret = 0;
+ local = frame->local;
+ if (gf_uuid_is_null(local->gfid))
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ else
+ gf_uuid_unparse(local->gfid, gfid);
- ret = dict_set_int32 (dict, DHT_SKIP_NON_LINKTO_UNLINK, 1);
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "protecting namespace failed, skipping linkto "
+ "creation (path:%s)(gfid:%s)(hashed-subvol:%s)"
+ "(cached-subvol:%s)",
+ local->loc.path, gfid, local->hashed_subvol->name,
+ local->cached_subvol->name);
+ goto err;
+ }
- if (ret)
- goto err;
+ local->locked = _gf_true;
- ret = dict_set_int32 (dict, DHT_SKIP_OPEN_FD_UNLINK, 1);
+ local->call_cnt = 2;
- if (ret)
- goto err;
+ for (i = 0; i < 2; i++) {
+ subvol = (subvol == NULL) ? local->hashed_subvol : local->cached_subvol;
+ STACK_WIND_COOKIE(frame, dht_linkfile_create_lookup_cbk, subvol, subvol,
+ subvol->fops->lookup, &local->loc, NULL);
+ }
- return 0;
+ return 0;
err:
- return -1;
-
+ dht_lookup_linkfile_create_cbk(frame, NULL, this, 0, 0, local->loc.inode,
+ &local->stbuf, &local->preparent,
+ &local->postparent, local->xattr);
+ return 0;
}
+
/* Rebalance is performed from cached_node to hashed_node. Initial cached_node
* contains a non-linkto file. After migration it is converted to linkto and
* then unlinked. And at hashed_subvolume, first a linkto file is present,
@@ -1103,5417 +2335,9057 @@ err:
* dht_lookup_everywhere_done takes decision based on any of the above case
*/
-int
-dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
+static int
+dht_lookup_everywhere_done(call_frame_t *frame, xlator_t *this)
{
- int ret = 0;
- dht_local_t *local = NULL;
- xlator_t *hashed_subvol = NULL;
- xlator_t *cached_subvol = NULL;
- dht_layout_t *layout = NULL;
- char gfid[GF_UUID_BUF_SIZE] = {0};
- gf_boolean_t found_non_linkto_on_hashed = _gf_false;
+ int ret = 0;
+ dht_local_t *local = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_layout_t *layout = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ gf_boolean_t found_non_linkto_on_hashed = _gf_false;
+
+ local = frame->local;
+ hashed_subvol = local->hashed_subvol;
+ cached_subvol = local->cached_subvol;
+
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ if (local->file_count && local->dir_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_FILE_TYPE_MISMATCH,
+ "path %s (gfid = %s)exists as a file on one "
+ "subvolume and directory on another. "
+ "Please fix it manually",
+ local->loc.path, gfid);
+ DHT_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, NULL, NULL);
+ return 0;
+ }
+ if (local->op_ret && local->gfid_missing) {
+ if (gf_uuid_is_null(local->gfid_req)) {
+ DHT_STACK_UNWIND(lookup, frame, -1, ENODATA, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+ /* A hack */
+ dht_lookup_directory(frame, this, &local->loc);
+ return 0;
+ }
- local = frame->local;
- hashed_subvol = local->hashed_subvol;
- cached_subvol = local->cached_subvol;
+ if (local->dir_count) {
+ dht_lookup_directory(frame, this, &local->loc);
+ return 0;
+ }
+
+ gf_msg_debug(this->name, 0,
+ "STATUS: hashed_subvol %s "
+ "cached_subvol %s",
+ (hashed_subvol == NULL) ? "null" : hashed_subvol->name,
+ (cached_subvol == NULL) ? "null" : cached_subvol->name);
+
+ if (!cached_subvol) {
+ if (local->skip_unlink.handle_valid_link && hashed_subvol) {
+ /*Purpose of "DHT_SKIP_NON_LINKTO_UNLINK":
+ * If this lookup is performed by rebalance and this
+ * rebalance process detected hashed file and by
+ * the time it sends the lookup request to cached node,
+ * file got migrated and now at initial hashed_node,
+ * final migrated file is present. With current logic,
+ * because this process fails to find the cached_node,
+ * it will unlink the file at initial hashed_node.
+ *
+ * So we avoid this by setting key, and checking at the
+ * posix_unlink that unlink the file only if file is a
+ * linkto file and not a migrated_file.
+ */
+
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(
+ local->xattr_req);
+
+ if (ret) {
+ /* If for some reason, setting key in the dict
+ * fails, return with ENOENT, as with respect to
+ * this process, it detected only a stale link
+ * file.
+ *
+ * Next lookup will delete it.
+ *
+ * Performing deletion of stale link file when
+ * setting key in dict fails, may cause the data
+ * loss because of the above mentioned race.
+ */
- gf_uuid_unparse (local->loc.gfid, gfid);
-
- if (local->file_count && local->dir_count) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_FILE_TYPE_MISMATCH,
- "path %s (gfid = %s)exists as a file on one "
- "subvolume and directory on another. "
- "Please fix it manually",
- local->loc.path, gfid);
- DHT_STACK_UNWIND (lookup, frame, -1, EIO, NULL, NULL, NULL,
- NULL);
- return 0;
- }
+ DHT_STACK_UNWIND(lookup, frame, -1, ENOENT, NULL, NULL, NULL,
+ NULL);
+ } else {
+ local->skip_unlink.handle_valid_link = _gf_false;
+
+ gf_msg_debug(this->name, 0,
+ "No Cached was found and "
+ "unlink on hashed was skipped"
+ " so performing now: %s",
+ local->loc.path);
+ FRAME_SU_DO(frame, dht_local_t);
+ STACK_WIND(frame, dht_lookup_unlink_stale_linkto_cbk,
+ hashed_subvol, hashed_subvol->fops->unlink,
+ &local->loc, 0, local->xattr_req);
+ }
- if (local->dir_count) {
- dht_lookup_directory (frame, this, &local->loc);
- return 0;
+ } else {
+ gf_msg_debug(this->name, 0,
+ "There was no cached file and "
+ "unlink on hashed is not skipped %s",
+ local->loc.path);
+
+ DHT_STACK_UNWIND(lookup, frame, -1, ENOENT, NULL, NULL, NULL, NULL);
}
+ return 0;
+ }
- gf_msg_debug (this->name, 0, "STATUS: hashed_subvol %s "
- "cached_subvol %s",
- (hashed_subvol == NULL)?"null":hashed_subvol->name,
- (cached_subvol == NULL)?"null":cached_subvol->name);
+ /* At the time of dht_lookup, no file was found on hashed and that is
+ * why dht_lookup_everywhere is called, but by the time
+ * dht_lookup_everywhere
+ * reached to server, file might have already migrated. In that case we
+ * will find a migrated file at the hashed_node. In this case store the
+ * layout in context and return successfully.
+ */
- if (!cached_subvol) {
+ if (hashed_subvol || local->need_lookup_everywhere) {
+ if (local->need_lookup_everywhere) {
+ found_non_linkto_on_hashed = _gf_true;
- if (local->skip_unlink.handle_valid_link && hashed_subvol) {
-
- /*Purpose of "DHT_SKIP_NON_LINKTO_UNLINK":
- * If this lookup is performed by rebalance and this
- * rebalance process detected hashed file and by
- * the time it sends the lookup request to cached node,
- * file got migrated and now at initial hashed_node,
- * final migrated file is present. With current logic,
- * because this process fails to find the cached_node,
- * it will unlink the file at initial hashed_node.
- *
- * So we avoid this by setting key, and checking at the
- * posix_unlink that unlink the file only if file is a
- * linkto file and not a migrated_file.
- */
-
-
- ret = dht_fill_dict_to_avoid_unlink_of_migrating_file
- (local->xattr_req);
-
- if (ret) {
- /* If for some reason, setting key in the dict
- * fails, return with ENOENT, as with respect to
- * this process, it detected only a stale link
- * file.
- *
- * Next lookup will delete it.
- *
- * Performing deletion of stale link file when
- * setting key in dict fails, may cause the data
- * loss becase of the above mentioned race.
- */
-
-
- DHT_STACK_UNWIND (lookup, frame, -1, ENOENT,
- NULL, NULL, NULL, NULL);
- } else {
- local->skip_unlink.handle_valid_link = _gf_false;
-
- gf_msg_debug (this->name, 0,
- "No Cached was found and "
- "unlink on hashed was skipped"
- " so performing now: %s",
- local->loc.path);
-
- STACK_WIND (frame,
- dht_lookup_unlink_stale_linkto_cbk,
- hashed_subvol,
- hashed_subvol->fops->unlink,
- &local->loc, 0, local->xattr_req);
- }
+ } else if ((local->file_count == 1) &&
+ (hashed_subvol == cached_subvol)) {
+ gf_msg_debug(this->name, 0,
+ "found cached file on hashed subvolume "
+ "so store in context and return for %s",
+ local->loc.path);
- } else {
+ found_non_linkto_on_hashed = _gf_true;
+ }
- gf_msg_debug (this->name, 0,
- "There was no cached file and "
- "unlink on hashed is not skipped %s",
- local->loc.path);
+ if (found_non_linkto_on_hashed)
+ goto preset_layout;
+ }
- DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL, NULL,
- NULL, NULL);
+ if (hashed_subvol) {
+ if (local->skip_unlink.handle_valid_link == _gf_true) {
+ if (cached_subvol == local->skip_unlink.hash_links_to) {
+ if (gf_uuid_compare(local->skip_unlink.cached_gfid,
+ local->skip_unlink.hashed_gfid)) {
+ /*GFID different, return error*/
+ DHT_STACK_UNWIND(lookup, frame, -1, ESTALE, NULL, NULL,
+ NULL, NULL);
+
+ return 0;
}
- return 0;
- }
- /* At the time of dht_lookup, no file was found on hashed and that is
- * why dht_lookup_everywhere is called, but by the time
- * dht_lookup_everywhere
- * reached to server, file might have already migrated. In that case we
- * will find a migrated file at the hashed_node. In this case store the
- * layout in context and return successfully.
- */
+ ret = dht_layout_preset(this, cached_subvol, local->loc.inode);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LAYOUT_PRESET_FAILED,
+ "Could not set pre-set layout "
+ "for subvolume %s",
+ cached_subvol->name);
+ }
- if (hashed_subvol || local->need_lookup_everywhere) {
+ local->op_ret = (ret == 0) ? ret : -1;
+ local->op_errno = (ret == 0) ? ret : EINVAL;
- if (local->need_lookup_everywhere) {
+ /* Presence of local->cached_subvol validates
+ * that lookup from cached node is successful
+ */
- found_non_linkto_on_hashed = _gf_true;
+ if (!local->op_ret && local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ gf_msg_debug(this->name, 0,
+ "Skipped unlinking linkto file "
+ "on the hashed subvolume. "
+ "Returning success as it is a "
+ "valid linkto file. Path:%s",
+ local->loc.path);
+
+ goto unwind_hashed_and_cached;
+ } else {
+ local->skip_unlink.handle_valid_link = _gf_false;
+
+ gf_msg_debug(this->name, 0,
+ "Linkto file found on hashed "
+ "subvol "
+ "and data file found on cached "
+ "subvolume. But linkto points to "
+ "different cached subvolume (%s) "
+ "path %s",
+ (local->skip_unlink.hash_links_to
+ ? local->skip_unlink.hash_links_to->name
+ : " <nil>"),
+ local->loc.path);
+
+ if (local->skip_unlink.opend_fd_count == 0) {
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(
+ local->xattr_req);
+
+ if (ret) {
+ DHT_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL,
+ NULL, NULL);
+ } else {
+ local->call_cnt = 1;
+ FRAME_SU_DO(frame, dht_local_t);
+ STACK_WIND(frame, dht_lookup_unlink_of_false_linkto_cbk,
+ hashed_subvol, hashed_subvol->fops->unlink,
+ &local->loc, 0, local->xattr_req);
+ }
+
+ return 0;
+ }
+ }
+ }
+ }
- } else if ((local->file_count == 1) &&
- (hashed_subvol == cached_subvol)) {
+preset_layout:
- gf_msg_debug (this->name, 0,
- "found cached file on hashed subvolume "
- "so store in context and return for %s",
- local->loc.path);
+ if (found_non_linkto_on_hashed) {
+ if (local->need_lookup_everywhere) {
+ if (gf_uuid_compare(local->gfid, local->inode->gfid)) {
+ /* GFID different, return error */
+ DHT_STACK_UNWIND(lookup, frame, -1, ENOENT, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+ }
- found_non_linkto_on_hashed = _gf_true;
- }
+ local->op_ret = 0;
+ local->op_errno = 0;
+ layout = dht_layout_for_subvol(this, cached_subvol);
+ if (!layout) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "%s: no pre-set layout for subvolume %s,"
+ " gfid = %s",
+ local->loc.path,
+ (cached_subvol ? cached_subvol->name : "<nil>"), gfid);
+ }
- if (found_non_linkto_on_hashed)
- goto preset_layout;
+ ret = dht_layout_set(this, local->inode, layout);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "%s: failed to set layout for subvol %s, "
+ "gfid = %s",
+ local->loc.path,
+ (cached_subvol ? cached_subvol->name : "<nil>"), gfid);
+ }
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
}
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->postparent);
+ DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
+ }
- if (hashed_subvol) {
- if (local->skip_unlink.handle_valid_link == _gf_true) {
- if (cached_subvol == local->skip_unlink.hash_links_to) {
+ if (!hashed_subvol) {
+ gf_msg_debug(this->name, 0,
+ "Cannot create linkfile for %s on %s: "
+ "hashed subvolume cannot be found, gfid = %s.",
+ local->loc.path, cached_subvol->name, gfid);
- if (gf_uuid_compare (local->skip_unlink.cached_gfid,
- local->skip_unlink.hashed_gfid)){
+ local->op_ret = 0;
+ local->op_errno = 0;
- /*GFID different, return error*/
- DHT_STACK_UNWIND (lookup, frame, -1,
- ESTALE, NULL, NULL,
- NULL, NULL);
+ ret = dht_layout_preset(frame->this, cached_subvol, local->inode);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_PRESET_FAILED,
+ "Failed to set layout for subvol %s"
+ ", gfid = %s",
+ cached_subvol ? cached_subvol->name : "<nil>", gfid);
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ }
- return 0;
- }
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
+ }
- ret = dht_layout_preset (this, cached_subvol,
- local->loc.inode);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Could not set pre-set layout "
- "for subvolume %s",
- cached_subvol->name);
- }
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->postparent);
+ DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
+ }
+
+ if (frame->root->op != GF_FOP_RENAME) {
+ local->current = &local->lock[0];
+ ret = dht_protect_namespace(frame, &local->loc, hashed_subvol,
+ &local->current->ns,
+ dht_call_lookup_linkfile_create);
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Creating linkto file on %s(hash) to %s on %s "
+ "(gfid = %s)",
+ hashed_subvol->name, local->loc.path, cached_subvol->name,
+ gfid);
+
+ ret = dht_linkfile_create(frame, dht_lookup_linkfile_create_cbk, this,
+ cached_subvol, hashed_subvol, &local->loc);
+ }
+
+ return ret;
- local->op_ret = (ret == 0) ? ret : -1;
- local->op_errno = (ret == 0) ? ret : EINVAL;
+unwind_hashed_and_cached:
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->postparent);
+ DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
+}
- /* Presence of local->cached_subvol validates
- * that lookup from cached node is successful
- */
+static int
+dht_lookup_everywhere_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ int is_linkfile = 0;
+ int is_dir = 0;
+ loc_t *loc = NULL;
+ xlator_t *link_subvol = NULL;
+ int ret = -1;
+ int32_t fd_count = 0;
+ dht_conf_t *conf = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ dict_t *dict_req = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+ GF_VALIDATE_OR_GOTO("dht", this->private, out);
+
+ local = frame->local;
+ loc = &local->loc;
+ conf = this->private;
+
+ prev = cookie;
+
+ gf_msg_debug(this->name, 0,
+ "returned with op_ret %d and op_errno %d (%s) "
+ "from subvol %s",
+ op_ret, op_errno, loc->path, prev->name);
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ if (op_errno != ENOENT)
+ local->op_errno = op_errno;
+ if (op_errno == ENODATA)
+ local->gfid_missing = _gf_true;
+ goto unlock;
+ }
- if (!local->op_ret && local->loc.parent) {
- dht_inode_ctx_time_update
- (local->loc.parent, this,
- &local->postparent, 1);
- }
+ if (gf_uuid_is_null(local->gfid))
+ gf_uuid_copy(local->gfid, buf->ia_gfid);
- gf_msg_debug (this->name, 0,
- "Skipped unlinking linkto file "
- "on the hashed subvolume. "
- "Returning success as it is a "
- "valid linkto file. Path:%s"
- ,local->loc.path);
+ gf_uuid_unparse(local->gfid, gfid);
- goto unwind_hashed_and_cached;
- } else {
+ if (gf_uuid_compare(local->gfid, buf->ia_gfid)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_MISMATCH,
+ "%s: gfid differs on subvolume %s,"
+ " gfid local = %s, gfid node = %s",
+ loc->path, prev->name, gfid, uuid_utoa(buf->ia_gfid));
+ }
- local->skip_unlink.handle_valid_link = _gf_false;
+ is_linkfile = check_is_linkfile(inode, buf, xattr,
+ conf->link_xattr_name);
- gf_msg_debug (this->name, 0,
- "Linkto file found on hashed "
- "subvol "
- "and data file found on cached "
- "subvolume. But linkto points to "
- "different cached subvolume (%s) "
- "path %s",
- (local->skip_unlink.hash_links_to ?
- local->skip_unlink.hash_links_to->name :
- " <nil>"), local->loc.path);
+ if (is_linkfile) {
+ link_subvol = dht_linkfile_subvol(this, inode, buf, xattr);
+ gf_msg_debug(this->name, 0, "found on %s linkfile %s (-> %s)",
+ prev->name, loc->path,
+ link_subvol ? link_subvol->name : "''");
+ goto unlock;
+ }
- if (local->skip_unlink.opend_fd_count == 0) {
+ is_dir = check_is_dir(inode, buf, xattr);
+ /* non linkfile GFID takes precedence but don't overwrite
+ gfid if we have already found a cached file*/
+ if (!local->cached_subvol)
+ gf_uuid_copy(local->gfid, buf->ia_gfid);
- ret = dht_fill_dict_to_avoid_unlink_of_migrating_file
- (local->xattr_req);
+ if (is_dir) {
+ local->dir_count++;
+ gf_msg_debug(this->name, 0, "found on %s directory %s", prev->name,
+ loc->path);
+ } else {
+ local->file_count++;
- if (ret) {
- DHT_STACK_UNWIND (lookup, frame, -1,
- EIO, NULL, NULL,
- NULL, NULL);
- } else {
- local->call_cnt = 1;
- STACK_WIND (frame,
- dht_lookup_unlink_of_false_linkto_cbk,
- hashed_subvol,
- hashed_subvol->fops->unlink,
- &local->loc, 0,
- local->xattr_req);
- }
+ gf_msg_debug(this->name, 0, "found cached file on %s for %s",
+ prev->name, loc->path);
- return 0;
+ if (!local->cached_subvol) {
+ /* found one file */
+ dht_iatt_merge(this, &local->stbuf, buf);
- }
- }
+ local->xattr = dict_ref(xattr);
+ local->cached_subvol = prev;
- }
- }
+ gf_msg_debug(this->name, 0,
+ "storing cached on %s file"
+ " %s",
+ prev->name, loc->path);
+ dht_iatt_merge(this, &local->postparent, postparent);
-preset_layout:
+ gf_uuid_copy(local->skip_unlink.cached_gfid, buf->ia_gfid);
+ } else {
+ /* This is where we need 'rename' both entries logic */
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_FILE_ON_MULT_SUBVOL,
+ "multiple subvolumes (%s and %s) have "
+ "file %s (preferably rename the file "
+ "in the backend,and do a fresh lookup)",
+ local->cached_subvol->name, prev->name, local->loc.path);
+ }
+ }
+ }
+unlock:
+ UNLOCK(&frame->lock);
- if (found_non_linkto_on_hashed) {
+ if (is_linkfile) {
+ ret = dict_get_int32(xattr, GLUSTERFS_OPEN_FD_COUNT, &fd_count);
- if (local->need_lookup_everywhere) {
- if (gf_uuid_compare (local->gfid, local->inode->gfid)) {
- /* GFID different, return error */
- DHT_STACK_UNWIND (lookup, frame, -1, ENOENT,
- NULL, NULL, NULL, NULL);
- return 0;
- }
- }
+ /* Any linkto file found on the non-hashed subvolume should
+ * be unlinked (performed in the "else if" block below)
+ *
+ * But if a linkto file is found on hashed subvolume, it may be
+ * pointing to valid cached node. So unlinking of linkto
+ * file on hashed subvolume is skipped and inside
+ * dht_lookup_everywhere_done, checks are performed. If this
+ * linkto file is found as stale linkto file, it is deleted
+ * otherwise unlink is skipped.
+ */
- local->op_ret = 0;
- local->op_errno = 0;
- layout = dht_layout_for_subvol (this, cached_subvol);
- if (!layout) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: no pre-set layout for subvolume %s,"
- " gfid = %s",
- local->loc.path, (cached_subvol ?
- cached_subvol->name :
- "<nil>"), gfid);
- }
+ if (local->hashed_subvol && local->hashed_subvol == prev) {
+ local->skip_unlink.handle_valid_link = _gf_true;
+ local->skip_unlink.opend_fd_count = fd_count;
+ local->skip_unlink.hash_links_to = link_subvol;
+ gf_uuid_copy(local->skip_unlink.hashed_gfid, buf->ia_gfid);
+
+ gf_msg_debug(this->name, 0,
+ "Found"
+ " one linkto file on hashed subvol %s "
+ "for %s: Skipping unlinking till "
+ "everywhere_done",
+ prev->name, loc->path);
+
+ } else if (!ret && (fd_count == 0)) {
+ dict_req = dict_new();
+
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_req);
+
+ if (ret) {
+ /* Skip unlinking for dict_failure
+ *File is found as a linkto file on non-hashed,
+ *subvolume. In the current implementation,
+ *finding a linkto-file on non-hashed does not
+ *always implies that it is stale. So deletion
+ *of file should be done only when both fd is
+ *closed and linkto-xattr is set. In case of
+ *dict_set failure, avoid skipping of file.
+ *NOTE: dht_frame_return should get called for
+ * this block.
+ */
- ret = dht_layout_set (this, local->inode, layout);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to set layout for subvol %s, "
- "gfid = %s",
- local->loc.path, (cached_subvol ?
- cached_subvol->name :
- "<nil>"), gfid);
- }
+ dict_unref(dict_req);
+
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "attempting deletion of stale linkfile "
+ "%s on %s (hashed subvol is %s)",
+ loc->path, prev->name,
+ (local->hashed_subvol ? local->hashed_subvol->name
+ : "<null>"));
+ /* *
+ * These stale files may be created using root
+ * user. Hence deletion will work only with
+ * root.
+ */
+ FRAME_SU_DO(frame, dht_local_t);
+ STACK_WIND(frame, dht_lookup_unlink_cbk, prev,
+ prev->fops->unlink, loc, 0, dict_req);
- if (local->loc.parent) {
- dht_inode_ctx_time_update (local->loc.parent, this,
- &local->postparent, 1);
- }
+ dict_unref(dict_req);
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (lookup, frame, local->op_ret,
- local->op_errno, local->inode,
- &local->stbuf, local->xattr,
- &local->postparent);
return 0;
+ }
}
+ }
- if (!hashed_subvol) {
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ dht_lookup_everywhere_done(frame, this);
+ }
- gf_msg_debug (this->name, 0,
- "Cannot create linkfile for %s on %s: "
- "hashed subvolume cannot be found, gfid = %s.",
- local->loc.path, cached_subvol->name, gfid);
+out:
+ return ret;
+}
- local->op_ret = 0;
- local->op_errno = 0;
+int
+dht_lookup_everywhere(call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ int call_cnt = 0;
- ret = dht_layout_preset (frame->this, cached_subvol,
- local->inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Failed to set layout for subvol %s"
- ", gfid = %s",
- cached_subvol ? cached_subvol->name :
- "<nil>", gfid);
- local->op_ret = -1;
- local->op_errno = EINVAL;
- }
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO("dht", loc, out);
- if (local->loc.parent) {
- dht_inode_ctx_time_update (local->loc.parent, this,
- &local->postparent, 1);
- }
+ conf = this->private;
+ local = frame->local;
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (lookup, frame, local->op_ret,
- local->op_errno, local->inode,
- &local->stbuf, local->xattr,
- &local->postparent);
- return 0;
- }
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
- gf_msg_debug (this->name, 0,
- "Creating linkto file on %s(hash) to %s on %s (gfid = %s)",
- hashed_subvol->name, local->loc.path,
- cached_subvol->name, gfid);
+ if (!local->inode)
+ local->inode = inode_ref(loc->inode);
- ret = dht_linkfile_create (frame,
- dht_lookup_linkfile_create_cbk, this,
- cached_subvol, hashed_subvol, &local->loc);
+ gf_msg_debug(this->name, 0, "winding lookup call to %d subvols", call_cnt);
- return ret;
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_lookup_everywhere_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, loc,
+ local->xattr_req);
+ }
-unwind_hashed_and_cached:
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->loc.inode, &local->stbuf, local->xattr,
- &local->postparent);
- return 0;
+ return 0;
+out:
+ DHT_STACK_UNWIND(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL);
+err:
+ return -1;
}
int
-dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- int is_linkfile = 0;
- int is_dir = 0;
- xlator_t *subvol = NULL;
- loc_t *loc = NULL;
- xlator_t *link_subvol = NULL;
- int ret = -1;
- int32_t fd_count = 0;
- dht_conf_t *conf = NULL;
- char gfid[GF_UUID_BUF_SIZE] = {0};
- dict_t *dict_req = {0};
-
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
- GF_VALIDATE_OR_GOTO ("dht", this->private, out);
-
- local = frame->local;
- loc = &local->loc;
- conf = this->private;
-
- prev = cookie;
- subvol = prev->this;
-
- gf_msg_debug (this->name, 0,
- "returned with op_ret %d and op_errno %d (%s) "
- "from subvol %s", op_ret, op_errno, loc->path,
- subvol->name);
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno != ENOENT)
- local->op_errno = op_errno;
- goto unlock;
- }
+dht_lookup_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ xlator_t *prev = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ loc_t *loc = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, unwind);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, unwind);
+ GF_VALIDATE_OR_GOTO("dht", this->private, unwind);
+ GF_VALIDATE_OR_GOTO("dht", cookie, unwind);
+
+ prev = cookie;
+ subvol = prev;
+ conf = this->private;
+ local = frame->local;
+ loc = &local->loc;
+
+ gf_uuid_unparse(loc->gfid, gfid);
+
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_LINK_FILE_LOOKUP_INFO,
+ "Lookup of %s on %s (following linkfile) failed "
+ ",gfid = %s",
+ local->loc.path, subvol->name, gfid);
+
+ /* If cached subvol returned ENOTCONN, do not do
+ lookup_everywhere. We need to make sure linkfile does not get
+ removed, which can take away the namespace, and subvol is
+ anyways down. */
+
+ local->cached_subvol = NULL;
+ if (op_errno != ENOTCONN)
+ goto err;
+ else
+ goto unwind;
+ }
+
+ if (check_is_dir(inode, stbuf, xattr)) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LINK_FILE_LOOKUP_INFO,
+ "Lookup of %s on %s (following linkfile) reached dir,"
+ " gfid = %s",
+ local->loc.path, subvol->name, gfid);
+ goto err;
+ }
+
+ if (check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name)) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LINK_FILE_LOOKUP_INFO,
+ "lookup of %s on %s (following linkfile) reached link,"
+ "gfid = %s",
+ local->loc.path, subvol->name, gfid);
+ goto err;
+ }
+
+ if (gf_uuid_compare(local->gfid, stbuf->ia_gfid)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_MISMATCH,
+ "%s: gfid different on data file on %s,"
+ " gfid local = %s, gfid node = %s ",
+ local->loc.path, subvol->name, gfid, uuid_utoa(stbuf->ia_gfid));
+ goto err;
+ }
+
+ if ((stbuf->ia_nlink == 1) && (conf && conf->unhashed_sticky_bit)) {
+ stbuf->ia_prot.sticky = 1;
+ }
+
+ ret = dht_layout_preset(this, prev, inode);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_PRESET_FAILED,
+ "Failed to set layout for subvolume %s,"
+ "gfid = %s",
+ prev->name, gfid);
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
+ }
- if (gf_uuid_is_null (local->gfid))
- gf_uuid_copy (local->gfid, buf->ia_gfid);
+unwind:
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ dht_set_fixed_dir_stat(postparent);
+ DHT_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
+ postparent);
- gf_uuid_unparse(local->gfid, gfid);
+ return 0;
- if (gf_uuid_compare (local->gfid, buf->ia_gfid)) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_GFID_MISMATCH,
- "%s: gfid differs on subvolume %s,"
- " gfid local = %s, gfid node = %s",
- loc->path, prev->this->name, gfid,
- uuid_utoa(buf->ia_gfid));
- }
+err:
+ dht_lookup_everywhere(frame, this, loc);
+out:
+ return 0;
+}
- is_linkfile = check_is_linkfile (inode, buf, xattr,
- conf->link_xattr_name);
-
- if (is_linkfile) {
- link_subvol = dht_linkfile_subvol (this, inode, buf,
- xattr);
- gf_msg_debug (this->name, 0,
- "found on %s linkfile %s (-> %s)",
- subvol->name, loc->path,
- link_subvol ? link_subvol->name : "''");
- goto unlock;
- }
+/* Code to get hashed subvol based on inode and loc
+ First it check if loc->parent and loc->path exist then it get
+ hashed subvol based on loc.
+*/
- is_dir = check_is_dir (inode, buf, xattr);
+static gf_boolean_t
+dht_should_lookup_everywhere(xlator_t *this, dht_conf_t *conf, loc_t *loc)
+{
+ dht_layout_t *parent_layout = NULL;
+ int ret = 0;
+ gf_boolean_t lookup_everywhere = _gf_true;
+
+ /* lookup-optimize supersedes lookup-unhashed settings.
+ * If it is set, do not process search_unhashed
+ * If lookup-optimize if enabled, lookup everywhere if:
+ * - this is the rebalance daemon.
+ * - loc->parent is unavailable.
+ * - parent_layout is unavailable
+ * - parent_layout->commit_hash != conf->vol_commit_hash
+ */
+
+ if (conf->lookup_optimize) {
+ if (!conf->defrag && loc->parent) {
+ ret = dht_inode_ctx_layout_get(loc->parent, this, &parent_layout);
+ if (!ret && parent_layout &&
+ (parent_layout->commit_hash == conf->vol_commit_hash)) {
+ lookup_everywhere = _gf_false;
+ }
+ }
+ goto out;
+ } else {
+ if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) {
+ if (loc->parent) {
+ ret = dht_inode_ctx_layout_get(loc->parent, this,
+ &parent_layout);
+ if (ret || !parent_layout ||
+ (!parent_layout->search_unhashed)) {
+ lookup_everywhere = _gf_false;
+ }
+ } else {
+ lookup_everywhere = _gf_false;
+ }
+
+ goto out;
+ }
+ }
+out:
+ return lookup_everywhere;
+}
+
+int
+dht_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ char is_linkfile = 0;
+ char is_dir = 0;
+ xlator_t *subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+ xlator_t *prev = NULL;
+ int ret = 0;
+ uint32_t vol_commit_hash = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+ GF_VALIDATE_OR_GOTO("dht", this->private, out);
+
+ conf = this->private;
+
+ prev = cookie;
+ local = frame->local;
+ loc = &local->loc;
+
+ gf_msg_debug(this->name, op_errno,
+ "%s: fresh_lookup on %s returned with op_ret %d", loc->path,
+ prev->name, op_ret);
+
+ if (op_ret == -1) {
+ if (ENTRY_MISSING(op_ret, op_errno)) {
+ if (1 == conf->subvolume_cnt) {
+ /* No need to lookup again */
+ goto out;
+ }
- /* non linkfile GFID takes precedence but don't overwrite
- gfid if we have already found a cached file*/
- if (!local->cached_subvol)
- gf_uuid_copy (local->gfid, buf->ia_gfid);
+ gf_msg_debug(this->name, 0, "Entry %s missing on subvol %s",
+ loc->path, prev->name);
- if (is_dir) {
- local->dir_count++;
+ if (dht_should_lookup_everywhere(this, conf, loc)) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere(frame, this, loc);
+ return 0;
+ }
- gf_msg_debug (this->name, 0,
- "found on %s directory %s",
- subvol->name, loc->path);
- } else {
- local->file_count++;
-
- gf_msg_debug (this->name, 0,
- "found cached file on %s for %s",
- subvol->name, loc->path);
-
- if (!local->cached_subvol) {
- /* found one file */
- dht_iatt_merge (this, &local->stbuf, buf,
- subvol);
- local->xattr = dict_ref (xattr);
- local->cached_subvol = subvol;
-
- gf_msg_debug (this->name, 0,
- "storing cached on %s file"
- " %s", subvol->name, loc->path);
-
- dht_iatt_merge (this, &local->postparent,
- postparent, subvol);
-
- gf_uuid_copy (local->skip_unlink.cached_gfid,
- buf->ia_gfid);
- } else {
- /* This is where we need 'rename' both entries logic */
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_FILE_ON_MULT_SUBVOL,
- "multiple subvolumes (%s and %s) have "
- "file %s (preferably rename the file "
- "in the backend,and do a fresh lookup)",
- local->cached_subvol->name,
- subvol->name, local->loc.path);
- }
- }
+ } else {
+ /* posix returns ENODATA if the gfid is not set but the client and
+ * server protocol layers do not send the stbuf. We need to
+ * heal this so check if this is a directory on the other subvols.
+ */
+ if ((op_errno == ENOTCONN) || (op_errno == ENODATA)) {
+ dht_lookup_directory(frame, this, &local->loc);
+ return 0;
+ }
+ }
+ gf_msg_debug(this->name, op_errno, "%s: Lookup on subvolume %s failed",
+ loc->path, prev->name);
+ goto out;
+ }
+
+ /* Lookup succeeded - op_ret = 0 */
+
+ /* This is required for handling stale linkfile deletion,
+ * or any more call which happens from this 'loc'.
+ */
+ if (gf_uuid_is_null(local->gfid)) {
+ /*This is set from the first successful response*/
+ memcpy(local->gfid, stbuf->ia_gfid, 16);
+ }
+
+ if (!conf->vch_forced) {
+ /* Update the commit hash in conf if it is found */
+ ret = dict_get_uint32(xattr, conf->commithash_xattr_name,
+ &vol_commit_hash);
+ if (ret == 0) {
+ conf->vol_commit_hash = vol_commit_hash;
}
-unlock:
- UNLOCK (&frame->lock);
+ }
- if (is_linkfile) {
- ret = dict_get_int32 (xattr, GLUSTERFS_OPEN_FD_COUNT, &fd_count);
+ is_dir = check_is_dir(inode, stbuf, xattr);
+ if (is_dir) {
+ /* A directory is present on all subvols, send the lookup to
+ * all subvols now */
+ local->inode = inode_ref(inode);
+ local->xattr = dict_ref(xattr);
+ dht_lookup_directory(frame, this, &local->loc);
+ return 0;
+ }
- /* Any linkto file found on the non-hashed subvolume should
- * be unlinked (performed in the "else if" block below)
- *
- * But if a linkto file is found on hashed subvolume, it may be
- * pointing to vaild cached node. So unlinking of linkto
- * file on hashed subvolume is skipped and inside
- * dht_lookup_everywhere_done, checks are performed. If this
- * linkto file is found as stale linkto file, it is deleted
- * otherwise unlink is skipped.
- */
+ is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name);
- if (local->hashed_subvol && local->hashed_subvol == subvol) {
+ if (!is_linkfile) {
+ /* non-directory and not a linkto file. This is a data file
+ * Update the layout to point to the cached subvol
+ */
- local->skip_unlink.handle_valid_link = _gf_true;
- local->skip_unlink.opend_fd_count = fd_count;
- local->skip_unlink.hash_links_to = link_subvol;
- gf_uuid_copy (local->skip_unlink.hashed_gfid,
- buf->ia_gfid);
+ ret = dht_layout_preset(this, prev, inode);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_PRESET_FAILED,
+ "%s: could not set pre-set layout for subvolume %s",
+ loc->path, prev->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ goto out;
+ }
+
+ /* This is a linkto file. Get the value of the target subvol from the
+ * linkto xattr and lookup there to see if the file exists
+ */
+ subvol = dht_linkfile_subvol(this, inode, stbuf, xattr);
+ if (!subvol) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "%s: No link subvol for linkto", loc->path);
+ dht_lookup_everywhere(frame, this, loc);
+ return 0;
+ }
- gf_msg_debug (this->name, 0, "Found"
- " one linkto file on hashed subvol %s "
- "for %s: Skipping unlinking till "
- "everywhere_done", subvol->name,
- loc->path);
+ gf_msg_debug(this->name, 0, "%s: Calling lookup on linkto target %s",
+ loc->path, subvol->name);
- } else if (!ret && (fd_count == 0)) {
+ STACK_WIND_COOKIE(frame, dht_lookup_linkfile_cbk, subvol, subvol,
+ subvol->fops->lookup, &local->loc, local->xattr_req);
- dict_req = dict_new ();
+ return 0;
- ret = dht_fill_dict_to_avoid_unlink_of_migrating_file
- (dict_req);
+out:
+ /*
+ * FIXME: postparent->ia_size and postparent->st_blocks do not have
+ * correct values. since, postparent corresponds to a directory these
+ * two members should have values equal to sum of corresponding values
+ * from each of the subvolume. See dht_iatt_merge for reference.
+ */
+
+ if (!op_ret && local && local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ dht_set_fixed_dir_stat(postparent);
+ DHT_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
+ postparent);
+err:
+ return 0;
+}
- if (ret) {
+/* For directories, check if acl xattrs have been requested (by the acl
+ * xlator), if not, request for them. These xattrs are needed for dht dir
+ * self-heal to perform proper self-healing of dirs
+ */
+static void
+dht_check_and_set_acl_xattr_req(xlator_t *this, dict_t *xattr_req)
+{
+ int ret = 0;
- /* Skip unlinking for dict_failure
- *File is found as a linkto file on non-hashed,
- *subvolume. In the current implementation,
- *finding a linkto-file on non-hashed does not
- *always implies that it is stale. So deletion
- *of file should be done only when both fd is
- *closed and linkto-xattr is set. In case of
- *dict_set failure, avoid skipping of file.
- *NOTE: dht_frame_return should get called for
- * this block.
- */
+ GF_ASSERT(xattr_req);
- dict_unref (dict_req);
+ if (!dict_get(xattr_req, POSIX_ACL_ACCESS_XATTR)) {
+ ret = dict_set_int8(xattr_req, POSIX_ACL_ACCESS_XATTR, 0);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s",
+ POSIX_ACL_ACCESS_XATTR);
+ }
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "attempting deletion of stale linkfile "
- "%s on %s (hashed subvol is %s)",
- loc->path, subvol->name,
- (local->hashed_subvol?
- local->hashed_subvol->name : "<null>"));
+ if (!dict_get(xattr_req, POSIX_ACL_DEFAULT_XATTR)) {
+ ret = dict_set_int8(xattr_req, POSIX_ACL_DEFAULT_XATTR, 0);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s",
+ POSIX_ACL_DEFAULT_XATTR);
+ }
+
+ return;
+}
- STACK_WIND (frame, dht_lookup_unlink_cbk,
- subvol, subvol->fops->unlink, loc,
- 0, dict_req);
+/* for directories, we need the following info:
+ * the layout : trusted.glusterfs.dht
+ * the mds information : trusted.glusterfs.dht.mds
+ * the acl info: See above
+ */
+static int
+dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ int ret = -EINVAL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf) {
+ goto err;
+ }
+
+ if (!xattr_req) {
+ goto err;
+ }
+
+ /* Xattr to get the layout for a directory
+ */
+ ret = dict_set_uint32(xattr_req, conf->xattr_name, 4 * 4);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s",
+ conf->xattr_name, loc->path);
+ goto err;
+ }
+
+ /*Non-fatal failure */
+ ret = dict_set_uint32(xattr_req, conf->mds_xattr_key, 4);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s",
+ conf->mds_xattr_key, loc->path);
+ }
+
+ dht_check_and_set_acl_xattr_req(this, xattr_req);
+ ret = 0;
+err:
+ return ret;
+}
- dict_unref (dict_req);
+/* If the hashed subvol is present, send the lookup to only that subvol first.
+ * If no hashed subvol, send a lookup to all subvols and proceed based on the
+ * responses.
+ */
+static int
+dht_do_fresh_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ xlator_t *hashed_subvol = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int call_cnt = 0;
+ int i = 0;
+
+ conf = this->private;
+ if (!conf) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = frame->local;
+ if (!local) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /* Since we don't know whether this is a file or a directory,
+ * request all xattrs*/
+ ret = dht_set_file_xattr_req(this, loc, local->xattr_req);
+ if (ret) {
+ op_errno = -ret;
+ goto err;
+ }
+
+ ret = dht_set_dir_xattr_req(this, loc, local->xattr_req);
+ if (ret) {
+ op_errno = -ret;
+ goto err;
+ }
+
+ /* Fuse sets a random value in gfid-req. If the gfid is missing
+ * on one or more subvols, posix will set the gfid to this value,
+ * causing GFID mismatches for directories. Remove the value fuse
+ * has sent before sending the lookup.
+ */
+ ret = dict_get_gfuuid(local->xattr_req, "gfid-req", &local->gfid_req);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "%s: No gfid-req available", loc->path);
+ } else {
+ dict_del(local->xattr_req, "gfid-req");
+ }
+ /* This should have been set in dht_lookup */
+ hashed_subvol = local->hashed_subvol;
+
+ if (!hashed_subvol) {
+ gf_msg_debug(this->name, 0,
+ "%s: no subvolume in layout for path, "
+ "checking on all the subvols to see if "
+ "it is a directory",
+ loc->path);
- return 0;
- }
- }
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ /* Allocate a layout. This will be populated and saved in
+ * the dht inode_ctx on successful lookup
+ */
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
}
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- dht_lookup_everywhere_done (frame, this);
+ gf_msg_debug(this->name, 0,
+ "%s: Found null hashed subvol. Calling lookup"
+ " on all nodes.",
+ loc->path);
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ local->xattr_req);
}
+ return 0;
+ }
-out:
- return ret;
-}
+ /* if the hashed_subvol is non-null, send the lookup there first so
+ * as to see whether we have a file or a directory */
+ gf_msg_debug(this->name, 0, "%s: Calling fresh lookup on %s", loc->path,
+ hashed_subvol->name);
+ STACK_WIND_COOKIE(frame, dht_lookup_cbk, hashed_subvol, hashed_subvol,
+ hashed_subvol->fops->lookup, loc, local->xattr_req);
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
+}
-int
-dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc)
+static int
+dht_do_revalidate(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int i = 0;
- int call_cnt = 0;
+ xlator_t *subvol = NULL;
+ xlator_t *mds_subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = -1;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+ int gen = 0;
+
+ conf = this->private;
+ if (!conf) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = frame->local;
+ if (!local) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0,
+ "path = %s. No layout found in the inode ctx.", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /* Generation number has changed. This layout may be stale. */
+ if (layout->gen && (layout->gen < conf->gen)) {
+ gen = layout->gen;
+ dht_layout_unref(this, local->layout);
+ local->layout = NULL;
+ local->cached_subvol = NULL;
+
+ gf_msg_debug(this->name, 0,
+ "path = %s. In memory layout may be stale."
+ "(layout->gen (%d) is less than "
+ "conf->gen (%d)). Calling fresh lookup.",
+ loc->path, gen, conf->gen);
+
+ dht_do_fresh_lookup(frame, this, loc);
+ return 0;
+ }
+
+ local->inode = inode_ref(loc->inode);
+
+ /* Since we don't know whether this has changed,
+ * request all xattrs*/
+ ret = dht_set_file_xattr_req(this, loc, local->xattr_req);
+ if (ret) {
+ op_errno = -ret;
+ goto err;
+ }
+
+ ret = dht_set_dir_xattr_req(this, loc, local->xattr_req);
+ if (ret) {
+ op_errno = -ret;
+ goto err;
+ }
+
+ if (IA_ISDIR(local->inode->ia_type)) {
+ ret = dht_inode_ctx_mdsvol_get(local->inode, this, &mds_subvol);
+ if (ret || !mds_subvol) {
+ gf_msg_debug(this->name, 0, "path = %s. No mds subvol in inode ctx",
+ local->loc.path);
+ }
+ local->mds_subvol = mds_subvol;
+ local->call_cnt = conf->subvolume_cnt;
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", this->private, out);
- GF_VALIDATE_OR_GOTO ("dht", loc, out);
+ /* local->call_cnt will change as responses are processed. Always use a
+ * local copy to loop through the STACK_WIND calls
+ */
- conf = this->private;
- local = frame->local;
+ call_cnt = local->call_cnt;
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_revalidate_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, loc,
+ local->xattr_req);
+ }
+ return 0;
+ }
- if (!local->inode)
- local->inode = inode_ref (loc->inode);
+ /* If not a dir, this should be 1 */
+ local->call_cnt = layout->cnt;
+ call_cnt = local->call_cnt;
- gf_msg_debug (this->name, 0,
- "winding lookup call to %d subvols", call_cnt);
+ for (i = 0; i < call_cnt; i++) {
+ subvol = layout->list[i].xlator;
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_everywhere_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- loc, local->xattr_req);
- }
+ gf_msg_debug(this->name, 0,
+ "path = %s. Calling "
+ "revalidate lookup on %s",
+ loc->path, subvol->name);
- return 0;
-out:
- DHT_STACK_UNWIND (lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL);
+ STACK_WIND_COOKIE(frame, dht_revalidate_cbk, subvol, subvol,
+ subvol->fops->lookup, &local->loc, local->xattr_req);
+ }
+ return 0;
err:
- return -1;
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
}
+/* Depending on the input, decide if this is a:
+ * fresh-lookup: loc->name is provided but no dht inode ctx
+ * revalidation: loc->name is provided, dht inode ctx is present
+ * discover: gfid based nameless lookup.
+ */
int
-dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
-{
- call_frame_t *prev = NULL;
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- loc_t *loc = NULL;
- dht_conf_t *conf = NULL;
- int ret = 0;
- char gfid[GF_UUID_BUF_SIZE] = {0};
-
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", this, unwind);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, unwind);
- GF_VALIDATE_OR_GOTO ("dht", this->private, unwind);
- GF_VALIDATE_OR_GOTO ("dht", cookie, unwind);
-
- prev = cookie;
- subvol = prev->this;
- conf = this->private;
- local = frame->local;
- loc = &local->loc;
+dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ xlator_t *hashed_subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = -1;
+ loc_t new_loc = {
+ 0,
+ };
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+
+ conf = this->private;
+ if (!conf)
+ goto err;
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_LOOKUP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = dht_filter_loc_subvol_key(this, loc, &new_loc, &hashed_subvol);
+ if (ret) {
+ loc_wipe(&local->loc);
+ ret = loc_dup(&new_loc, &local->loc);
+
+ /* we no longer need 'new_loc' entries */
+ loc_wipe(&new_loc);
+
+ /* check if loc_dup() is successful */
+ if (ret == -1) {
+ op_errno = errno;
+ gf_msg_debug(this->name, errno,
+ "copying location failed for path=%s", loc->path);
+ goto err;
+ }
+ }
+
+ if (xattr_req) {
+ local->xattr_req = dict_ref(xattr_req);
+ } else {
+ local->xattr_req = dict_new();
+ }
+
+ /* Nameless lookup */
+
+ /* This is usually sent by NFS. Lookups are done based on the gfid and
+ * no name information is available. Without the name, dht cannot calculate
+ * the hash and has to send a lookup to all subvols.
+ */
+ if (gf_uuid_is_null(loc->pargfid) && !gf_uuid_is_null(loc->gfid) &&
+ !__is_root_gfid(loc->inode->gfid)) {
+ local->cached_subvol = NULL;
+ dht_do_discover(frame, this, loc);
+ return 0;
+ }
- gf_uuid_unparse(loc->gfid, gfid);
+ if (loc_is_root(loc)) {
+ /* Request the DHT commit hash xattr (trusted.glusterfs.dht.commithash)
+ * set on the brick root.
+ */
+ ret = dict_set_uint32(local->xattr_req, conf->commithash_xattr_name,
+ sizeof(uint32_t));
+ }
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "Lookup of %s on %s (following linkfile) failed (%s)"
- ",gfid = %s", local->loc.path, subvol->name,
- strerror (op_errno), gfid);
-
- /* If cached subvol returned ENOTCONN, do not do
- lookup_everywhere. We need to make sure linkfile does not get
- removed, which can take away the namespace, and subvol is
- anyways down. */
-
- if (op_errno != ENOTCONN)
- goto err;
- else
- goto unwind;
- }
+ if (!hashed_subvol)
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+ local->hashed_subvol = hashed_subvol;
- if (check_is_dir (inode, stbuf, xattr)) {
- gf_log (this->name, GF_LOG_INFO,
- "Lookup of %s on %s (following linkfile) reached dir,"
- " gfid = %s", local->loc.path, subvol->name, gfid);
- goto err;
- }
+ if (is_revalidate(loc)) {
+ /* The entry has been looked up before and has a dht inode_ctx
+ */
+ dht_do_revalidate(frame, this, loc);
+ return 0;
+ } else {
+ /* Entry has not been looked up before
+ */
+ dht_do_fresh_lookup(frame, this, loc);
+ return 0;
+ }
- if (check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) {
- gf_log (this->name, GF_LOG_INFO,
- "lookup of %s on %s (following linkfile) reached link,"
- "gfid = %s", local->loc.path, subvol->name, gfid);
- goto err;
- }
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
+}
- if (gf_uuid_compare (local->gfid, stbuf->ia_gfid)) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_GFID_MISMATCH,
- "%s: gfid different on data file on %s,"
- " gfid local = %s, gfid node = %s ",
- local->loc.path, subvol->name, gfid,
- uuid_utoa(stbuf->ia_gfid));
- goto err;
- }
+static int
+dht_unlink_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
- if ((stbuf->ia_nlink == 1)
- && (conf && conf->unhashed_sticky_bit)) {
- stbuf->ia_prot.sticky = 1;
- }
+ local = frame->local;
+ prev = cookie;
- ret = dht_layout_preset (this, prev->this, inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Failed to set layout for subvolume %s,"
- "gfid = %s", prev->this->name, gfid);
- op_ret = -1;
- op_errno = EINVAL;
+ LOCK(&frame->lock);
+ {
+ if ((op_ret == -1) &&
+ !((op_errno == ENOENT) || (op_errno == ENOTCONN))) {
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, op_errno,
+ "Unlink link: subvolume %s returned -1", prev->name);
+ goto post_unlock;
}
- if (local->loc.parent) {
- dht_inode_ctx_time_update (local->loc.parent, this,
- postparent, 1);
+ local->op_ret = 0;
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
+ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, xdata);
+
+ return 0;
+}
+
+static int
+dht_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *hashed_subvol = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ if (op_errno != ENOENT) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = 0;
+ }
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, op_errno,
+ "Unlink: subvolume %s returned -1", prev->name);
+ goto post_unlock;
}
-unwind:
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
- postparent);
+ local->op_ret = 0;
- return 0;
+ local->postparent = *postparent;
+ local->preparent = *preparent;
-err:
- dht_lookup_everywhere (frame, this, loc);
-out:
- return 0;
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->preparent, 0);
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
+ }
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ if (!local->op_ret) {
+ hashed_subvol = dht_subvol_get_hashed(this, &local->loc);
+ if (hashed_subvol && hashed_subvol != local->cached_subvol) {
+ /*
+ * If hashed and cached are different, then we need
+ * to unlink linkfile from hashed subvol if data
+ * file is deleted successfully
+ */
+ STACK_WIND_COOKIE(frame, dht_unlink_linkfile_cbk, hashed_subvol,
+ hashed_subvol, hashed_subvol->fops->unlink,
+ &local->loc, local->flags, xdata);
+ return 0;
+ }
+ }
+
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
+ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, xdata);
+
+ return 0;
}
-
-int
-dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc)
+static int
+dht_common_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- int call_cnt = 0;
- int i = 0;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int ret = 0;
+ DHT_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", this, unwind);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, unwind);
- GF_VALIDATE_OR_GOTO ("dht", this->private, unwind);
- GF_VALIDATE_OR_GOTO ("dht", loc, unwind);
+static int
+dht_fix_layout_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
- conf = this->private;
+ if (op_ret == 0) {
+ /* update the layout in the inode ctx */
local = frame->local;
+ layout = local->selfheal.layout;
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
+ dht_layout_set(this, local->loc.inode, layout);
+ }
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
- if (!local->layout) {
- goto unwind;
- }
+ DHT_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
- if (local->xattr != NULL) {
- dict_unref (local->xattr);
- local->xattr = NULL;
- }
+static int
+dht_err_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
- if (!gf_uuid_is_null (local->gfid)) {
- ret = dict_set_static_bin (local->xattr_req, "gfid-req",
- local->gfid, 16);
- if (ret)
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:"
- " key = gfid-req", local->loc.path);
- }
+ local = frame->local;
+ prev = cookie;
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- &local->loc, local->xattr_req);
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto post_unlock;
}
- return 0;
-unwind:
- DHT_STACK_UNWIND (lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
+
+ local->op_ret = 0;
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ if ((local->fop == GF_FOP_SETXATTR) ||
+ (local->fop == GF_FOP_FSETXATTR)) {
+ DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
+ NULL);
+ /* 'local' itself may not be valid after this */
+ goto out;
+ }
+ if ((local->fop == GF_FOP_REMOVEXATTR) ||
+ (local->fop == GF_FOP_FREMOVEXATTR)) {
+ DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
+ NULL);
+ }
+ }
+
out:
- return 0;
+ return 0;
+}
+/* Set the value[] of key into dict after convert from
+ host byte order to network byte order
+*/
+int32_t
+dht_dict_set_array(dict_t *dict, char *key, int32_t value[], int32_t size)
+{
+ int ret = -1;
+ int32_t *ptr = NULL;
+ int32_t vindex;
+
+ if (value == NULL) {
+ return -EINVAL;
+ }
+
+ ptr = GF_MALLOC(sizeof(int32_t) * size, gf_common_mt_char);
+ if (ptr == NULL) {
+ return -ENOMEM;
+ }
+ for (vindex = 0; vindex < size; vindex++) {
+ ptr[vindex] = hton32(value[vindex]);
+ }
+ ret = dict_set_bin(dict, key, ptr, sizeof(int32_t) * size);
+ if (ret)
+ GF_FREE(ptr);
+ return ret;
}
+static int
+dht_common_mds_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = cookie;
-int
-dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
-{
- char is_linkfile = 0;
- char is_dir = 0;
- xlator_t *subvol = NULL;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- loc_t *loc = NULL;
- call_frame_t *prev = NULL;
- int ret = 0;
- dht_layout_t *parent_layout = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
- GF_VALIDATE_OR_GOTO ("dht", this->private, out);
-
- conf = this->private;
-
- prev = cookie;
- local = frame->local;
- loc = &local->loc;
+ local = frame->local;
- /* This is required for handling stale linkfile deletion,
- * or any more call which happens from this 'loc'.
- */
- if (!op_ret && gf_uuid_is_null (local->gfid))
- memcpy (local->gfid, stbuf->ia_gfid, 16);
-
- gf_msg_debug (this->name, 0,
- "fresh_lookup returned for %s with op_ret %d and "
- "op_errno %d", loc->path, op_ret, op_errno);
-
- if (ENTRY_MISSING (op_ret, op_errno)) {
- gf_msg_debug (this->name, 0,
- "Entry %s missing on subvol %s",
- loc->path, prev->this->name);
-
- if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_ON) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
- if ((conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) &&
- (loc->parent)) {
- ret = dht_inode_ctx_layout_get (loc->parent, this,
- &parent_layout);
- if (ret || !parent_layout)
- goto out;
- if (parent_layout->search_unhashed) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
- }
+ if (op_ret)
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->this->name);
+
+ if (local->fop == GF_FOP_SETXATTR) {
+ DHT_STACK_UNWIND(setxattr, frame, 0, op_errno, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto out;
+ }
+
+ if (local->fop == GF_FOP_FSETXATTR) {
+ DHT_STACK_UNWIND(fsetxattr, frame, 0, op_errno, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto out;
+ }
+
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ DHT_STACK_UNWIND(removexattr, frame, 0, op_errno, NULL);
+ /* 'local' itself may not be valid after this */
+ goto out;
+ }
+
+ if (local->fop == GF_FOP_FREMOVEXATTR) {
+ DHT_STACK_UNWIND(fremovexattr, frame, 0, op_errno, NULL);
+ }
+
+out:
+ return 0;
+}
+
+/* Code to wind a xattrop call to add 1 on current mds internal xattr
+ value
+*/
+static int
+dht_setxattr_non_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ int ret = 0;
+ dict_t *xattrop = NULL;
+ int32_t addone[1] = {1};
+ call_frame_t *prev = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret && !local->op_ret) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->this->name);
+ goto post_unlock;
+ }
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ this_call_cnt = dht_frame_return(frame);
+
+ if (is_last_call(this_call_cnt)) {
+ if (!local->op_ret) {
+ xattrop = dict_new();
+ if (!xattrop) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0,
+ "dictionary creation failed");
+ ret = -1;
+ goto out;
+ }
+ ret = dht_dict_set_array(xattrop, conf->mds_xattr_key, addone, 1);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "dictionary set array failed ");
+ ret = -1;
+ goto out;
+ }
+ if ((local->fop == GF_FOP_SETXATTR) ||
+ (local->fop == GF_FOP_REMOVEXATTR)) {
+ STACK_WIND(frame, dht_common_mds_xattrop_cbk, local->mds_subvol,
+ local->mds_subvol->fops->xattrop, &local->loc,
+ GF_XATTROP_ADD_ARRAY, xattrop, NULL);
+ } else {
+ STACK_WIND(frame, dht_common_mds_xattrop_cbk, local->mds_subvol,
+ local->mds_subvol->fops->fxattrop, local->fd,
+ GF_XATTROP_ADD_ARRAY, xattrop, NULL);
+ }
+ } else {
+ if (local->fop == GF_FOP_SETXATTR) {
+ DHT_STACK_UNWIND(setxattr, frame, 0, 0, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_FSETXATTR) {
+ DHT_STACK_UNWIND(fsetxattr, frame, 0, 0, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ DHT_STACK_UNWIND(removexattr, frame, 0, 0, NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_FREMOVEXATTR) {
+ DHT_STACK_UNWIND(fremovexattr, frame, 0, 0, NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+ }
+ }
+out:
+ if (ret) {
+ if (local->fop == GF_FOP_SETXATTR) {
+ DHT_STACK_UNWIND(setxattr, frame, 0, 0, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
- if (op_ret == 0) {
- is_dir = check_is_dir (inode, stbuf, xattr);
- if (is_dir) {
- local->inode = inode_ref (inode);
- local->xattr = dict_ref (xattr);
- }
+ if (local->fop == GF_FOP_FSETXATTR) {
+ DHT_STACK_UNWIND(fsetxattr, frame, 0, 0, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
- if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) {
- dht_lookup_directory (frame, this, &local->loc);
- return 0;
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ DHT_STACK_UNWIND(removexattr, frame, 0, 0, NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
- if (op_ret == -1) {
- gf_msg_debug (this->name, 0, "Lookup of %s for subvolume"
- " %s failed with error %s", loc->path,
- prev->this->name, strerror (op_errno));
- goto out;
+ if (local->fop == GF_FOP_FREMOVEXATTR) {
+ DHT_STACK_UNWIND(fremovexattr, frame, 0, 0, NULL);
}
+ }
+just_return:
+ if (xattrop)
+ dict_unref(xattrop);
+ return 0;
+}
- is_linkfile = check_is_linkfile (inode, stbuf, xattr,
- conf->link_xattr_name);
+static int
+dht_setxattr_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *mds_subvol = NULL;
+ int i = 0;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+ mds_subvol = local->mds_subvol;
+
+ if (op_ret == -1) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->this->name);
+ goto out;
+ }
- if (!is_linkfile) {
- /* non-directory and not a linkfile */
+ local->op_ret = 0;
+ local->call_cnt = conf->subvolume_cnt - 1;
+ local->xdata = dict_ref(xdata);
- ret = dht_layout_preset (this, prev->this, inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "could not set pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
- goto out;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (mds_subvol && (mds_subvol == conf->subvolumes[i]))
+ continue;
+ if (local->fop == GF_FOP_SETXATTR) {
+ STACK_WIND(frame, dht_setxattr_non_mds_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->setxattr, &local->loc,
+ local->xattr, local->flags, local->xattr_req);
}
- subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
- if (!subvol) {
+ if (local->fop == GF_FOP_FSETXATTR) {
+ STACK_WIND(frame, dht_setxattr_non_mds_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->fsetxattr, local->fd,
+ local->xattr, local->flags, local->xattr_req);
+ }
- gf_log (this->name, GF_LOG_INFO, "linkfile not having link "
- "subvol for %s", loc->path);
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ STACK_WIND(frame, dht_setxattr_non_mds_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->removexattr, &local->loc,
+ local->key, local->xattr_req);
+ }
- gf_msg_debug (this->name, 0,
- "linkfile not having link subvolume. path=%s",
- loc->path);
- dht_lookup_everywhere (frame, this, loc);
- return 0;
+ if (local->fop == GF_FOP_FREMOVEXATTR) {
+ STACK_WIND(frame, dht_setxattr_non_mds_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->fremovexattr, local->fd,
+ local->key, local->xattr_req);
}
+ }
- gf_msg_debug (this->name, 0,
- "Calling lookup on linkto target %s for path %s",
- subvol->name, loc->path);
+ return 0;
+out:
+ if (local->fop == GF_FOP_SETXATTR) {
+ DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_FSETXATTR) {
+ DHT_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
+ NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_FREMOVEXATTR) {
+ DHT_STACK_UNWIND(fremovexattr, frame, local->op_ret, local->op_errno,
+ NULL);
+ }
+
+just_return:
+ return 0;
+}
- STACK_WIND (frame, dht_lookup_linkfile_cbk,
- subvol, subvol->fops->lookup,
- &local->loc, local->xattr_req);
+static int
+dht_xattrop_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
- return 0;
+ local = frame->local;
+ prev = cookie;
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ local->op_ret = op_ret;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->this->name);
+ goto out;
+ }
+
+ if (local->fop == GF_FOP_SETXATTR) {
+ STACK_WIND(frame, dht_setxattr_mds_cbk, local->mds_subvol,
+ local->mds_subvol->fops->setxattr, &local->loc, local->xattr,
+ local->flags, local->xattr_req);
+ }
+
+ if (local->fop == GF_FOP_FSETXATTR) {
+ STACK_WIND(frame, dht_setxattr_mds_cbk, local->mds_subvol,
+ local->mds_subvol->fops->fsetxattr, local->fd, local->xattr,
+ local->flags, local->xattr_req);
+ }
+
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ STACK_WIND(frame, dht_setxattr_mds_cbk, local->mds_subvol,
+ local->mds_subvol->fops->removexattr, &local->loc,
+ local->key, local->xattr_req);
+ }
+
+ if (local->fop == GF_FOP_FREMOVEXATTR) {
+ STACK_WIND(frame, dht_setxattr_mds_cbk, local->mds_subvol,
+ local->mds_subvol->fops->fremovexattr, local->fd, local->key,
+ local->xattr_req);
+ }
+
+ return 0;
out:
- /*
- * FIXME: postparent->ia_size and postparent->st_blocks do not have
- * correct values. since, postparent corresponds to a directory these
- * two members should have values equal to sum of corresponding values
- * from each of the subvolume. See dht_iatt_merge for reference.
- */
-
- if (!op_ret && local && local->loc.parent) {
- dht_inode_ctx_time_update (local->loc.parent, this,
- postparent, 1);
- }
+ if (local->fop == GF_FOP_SETXATTR) {
+ DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_FSETXATTR) {
+ DHT_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
+ NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_FREMOVEXATTR) {
+ DHT_STACK_UNWIND(fremovexattr, frame, local->op_ret, local->op_errno,
+ NULL);
+ }
+
+just_return:
+ return 0;
+}
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
- postparent);
-err:
- return 0;
+static void
+fill_layout_info(dht_layout_t *layout, char *buf)
+{
+ int i = 0;
+ char tmp_buf[128] = {
+ 0,
+ };
+
+ for (i = 0; i < layout->cnt; i++) {
+ snprintf(tmp_buf, sizeof(tmp_buf), "(%s %u %u)",
+ layout->list[i].xlator->name, layout->list[i].start,
+ layout->list[i].stop);
+ if (i)
+ strcat(buf, " ");
+ strcat(buf, tmp_buf);
+ }
}
-/* For directories, check if acl xattrs have been requested (by the acl xlator),
- * if not, request for them. These xattrs are needed for dht dir self-heal to
- * perform proper self-healing of dirs
- */
-void
-dht_check_and_set_acl_xattr_req (inode_t *inode, dict_t *xattr_req)
+static void
+dht_fill_pathinfo_xattr(xlator_t *this, dht_local_t *local, char *xattr_buf,
+ int32_t alloc_len, int flag, char *layout_buf)
{
- int ret = 0;
+ if (flag) {
+ if (local->xattr_val) {
+ snprintf(xattr_buf, alloc_len,
+ "((<" DHT_PATHINFO_HEADER "%s> %s) (%s-layout %s))",
+ this->name, local->xattr_val, this->name, layout_buf);
+ } else {
+ snprintf(xattr_buf, alloc_len, "(%s-layout %s)", this->name,
+ layout_buf);
+ }
+ } else if (local->xattr_val) {
+ snprintf(xattr_buf, alloc_len, "(<" DHT_PATHINFO_HEADER "%s> %s)",
+ this->name, local->xattr_val);
+ } else {
+ xattr_buf[0] = '\0';
+ }
+}
- GF_ASSERT (inode);
- GF_ASSERT (xattr_req);
+static int
+dht_vgetxattr_alloc_and_fill(dht_local_t *local, dict_t *xattr, xlator_t *this,
+ int op_errno)
+{
+ int ret = -1;
+ char *value = NULL;
+
+ ret = dict_get_str(xattr, local->xsel, &value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_GET_XATTR_FAILED,
+ "Subvolume %s returned -1", this->name);
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto out;
+ }
- if (inode->ia_type != IA_IFDIR)
- return;
+ local->alloc_len += strlen(value);
- if (!dict_get (xattr_req, POSIX_ACL_ACCESS_XATTR)) {
- ret = dict_set_int8 (xattr_req, POSIX_ACL_ACCESS_XATTR, 0);
- if (ret)
- gf_msg (THIS->name, GF_LOG_WARNING, 0,
- DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s",
- POSIX_ACL_ACCESS_XATTR);
+ if (!local->xattr_val) {
+ local->alloc_len += (SLEN(DHT_PATHINFO_HEADER) + 10);
+ local->xattr_val = GF_MALLOC(local->alloc_len, gf_common_mt_char);
+ if (!local->xattr_val) {
+ ret = -1;
+ goto out;
}
+ local->xattr_val[0] = '\0';
+ }
- if (!dict_get (xattr_req, POSIX_ACL_DEFAULT_XATTR)) {
- ret = dict_set_int8 (xattr_req, POSIX_ACL_DEFAULT_XATTR, 0);
- if (ret)
- gf_msg (THIS->name, GF_LOG_WARNING, 0,
- DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s",
- POSIX_ACL_DEFAULT_XATTR);
+ int plen = strlen(local->xattr_val);
+ if (plen) {
+ /* extra byte(s) for \0 to be safe */
+ local->alloc_len += (plen + 2);
+ local->xattr_val = GF_REALLOC(local->xattr_val, local->alloc_len);
+ if (!local->xattr_val) {
+ ret = -1;
+ goto out;
}
+ }
- return;
+ (void)strcat(local->xattr_val, value);
+ (void)strcat(local->xattr_val, " ");
+ local->op_ret = 0;
+
+ ret = 0;
+
+out:
+ return ret;
}
-int
-dht_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
-{
- xlator_t *subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int op_errno = -1;
- dht_layout_t *layout = NULL;
- int i = 0;
- int call_cnt = 0;
- loc_t new_loc = {0,};
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- conf = this->private;
- if (!conf)
- goto err;
+static int
+dht_vgetxattr_fill_and_set(dht_local_t *local, dict_t **dict, xlator_t *this,
+ gf_boolean_t flag)
+{
+ int ret = -1;
+ char *xattr_buf = NULL;
+ char layout_buf[8192] = {
+ 0,
+ };
+
+ if (flag)
+ fill_layout_info(local->layout, layout_buf);
+
+ *dict = dict_new();
+ if (!*dict)
+ goto out;
+
+ local->xattr_val[strlen(local->xattr_val) - 1] = '\0';
+
+ /* we would need max this many bytes to create xattr string
+ * extra 40 bytes is just an estimated amount of additional
+ * space required as we include translator name and some
+ * spaces, brackets etc. when forming the pathinfo string.
+ *
+ * For node-uuid we just don't have all the pretty formatting,
+ * but since this is a generic routine for pathinfo & node-uuid
+ * we don't have conditional space allocation and try to be
+ * generic
+ */
+ local->alloc_len += (2 * strlen(this->name)) + strlen(layout_buf) + 40;
+ xattr_buf = GF_MALLOC(local->alloc_len, gf_common_mt_char);
+ if (!xattr_buf)
+ goto out;
+
+ if (XATTR_IS_PATHINFO(local->xsel)) {
+ (void)dht_fill_pathinfo_xattr(this, local, xattr_buf, local->alloc_len,
+ flag, layout_buf);
+ } else if ((XATTR_IS_NODE_UUID(local->xsel)) ||
+ (XATTR_IS_NODE_UUID_LIST(local->xsel))) {
+ (void)snprintf(xattr_buf, local->alloc_len, "%s", local->xattr_val);
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GET_XATTR_FAILED,
+ "Unknown local->xsel (%s)", local->xsel);
+ GF_FREE(xattr_buf);
+ goto out;
+ }
+
+ ret = dict_set_dynstr(*dict, local->xsel, xattr_buf);
+ if (ret)
+ GF_FREE(xattr_buf);
+ GF_FREE(local->xattr_val);
- local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+out:
+ return ret;
+}
- ret = dht_filter_loc_subvol_key (this, loc, &new_loc,
- &hashed_subvol);
- if (ret) {
- loc_wipe (&local->loc);
- ret = loc_dup (&new_loc, &local->loc);
-
- /* we no more need 'new_loc' entries */
- loc_wipe (&new_loc);
-
- /* check if loc_dup() is successful */
- if (ret == -1) {
- op_errno = errno;
- gf_msg_debug (this->name, 0,
- "copying location failed for path=%s",
- loc->path);
- goto err;
- }
+static int
+dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *prev = NULL;
+ int this_call_cnt = 0;
+ int ret = 0;
+ char *uuid_str = NULL;
+ char *uuid_list = NULL;
+ char *next_uuid_str = NULL;
+ char *saveptr = NULL;
+ uuid_t node_uuid = {
+ 0,
+ };
+ char *uuid_list_copy = NULL;
+ int count = 0;
+ int i = 0;
+ int index = 0;
+ int found = 0;
+ nodeuuid_info_t *tmp_ptr = NULL;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(frame->local, out);
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ VALIDATE_OR_GOTO(conf->defrag, out);
+
+ gf_msg_debug(this->name, 0, "subvol %s returned", prev->name);
+
+ LOCK(&frame->lock);
+ {
+ this_call_cnt = --local->call_cnt;
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ if (op_errno == ENODATA)
+ gf_msg_debug(this->name, 0, "failed to get node-uuid");
+ else
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_GET_XATTR_FAILED, "failed to get node-uuid");
+ goto post_unlock;
}
- if (xattr_req) {
- local->xattr_req = dict_ref (xattr_req);
- } else {
- local->xattr_req = dict_new ();
+ ret = dict_get_str(xattr, local->xsel, &uuid_list);
+
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_GET_FAILED,
+ "Failed to get %s", local->xsel);
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto unlock;
}
- if (gf_uuid_is_null (loc->pargfid) && !gf_uuid_is_null (loc->gfid) &&
- !__is_root_gfid (loc->inode->gfid)) {
- local->cached_subvol = NULL;
- dht_discover (frame, this, loc);
- return 0;
+ /* As DHT will not know details of its child xlators
+ * we need to parse this twice to get the count first
+ * and allocate memory later.
+ */
+ count = 0;
+ index = conf->local_subvols_cnt;
+
+ uuid_list_copy = gf_strdup(uuid_list);
+ if (!uuid_list_copy)
+ goto unlock;
+
+ for (uuid_str = strtok_r(uuid_list, " ", &saveptr); uuid_str;
+ uuid_str = next_uuid_str) {
+ next_uuid_str = strtok_r(NULL, " ", &saveptr);
+ if (gf_uuid_parse(uuid_str, node_uuid)) {
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UUID_PARSE_ERROR,
+ "Failed to parse uuid for %s", prev->name);
+ goto post_unlock;
+ }
+
+ count++;
+ if (gf_uuid_compare(node_uuid, conf->defrag->node_uuid)) {
+ gf_msg_debug(this->name, 0,
+ "subvol %s does not"
+ "belong to this node",
+ prev->name);
+ } else {
+ /* handle multiple bricks of the same replica
+ * on the same node */
+ if (found)
+ continue;
+ conf->local_subvols[(conf->local_subvols_cnt)++] = prev;
+ found = 1;
+ gf_msg_debug(this->name, 0,
+ "subvol %s belongs to"
+ " this node",
+ prev->name);
+ }
+ }
+
+ if (!found) {
+ local->op_ret = 0;
+ goto unlock;
+ }
+
+ conf->local_nodeuuids[index].count = count;
+ conf->local_nodeuuids[index].elements = GF_CALLOC(
+ count, sizeof(nodeuuid_info_t), 1);
+
+ /* The node-uuids are guaranteed to be returned in the same
+ * order as the bricks
+ * A null node-uuid is returned for a brick that is down.
+ */
+
+ saveptr = NULL;
+ i = 0;
+
+ for (uuid_str = strtok_r(uuid_list_copy, " ", &saveptr); uuid_str;
+ uuid_str = next_uuid_str) {
+ next_uuid_str = strtok_r(NULL, " ", &saveptr);
+ tmp_ptr = &(conf->local_nodeuuids[index].elements[i]);
+ gf_uuid_parse(uuid_str, tmp_ptr->uuid);
+
+ if (!gf_uuid_compare(tmp_ptr->uuid, conf->defrag->node_uuid)) {
+ tmp_ptr->info = REBAL_NODEUUID_MINE;
+ }
+ i++;
+ tmp_ptr = NULL;
}
+ }
- if (!hashed_subvol)
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- local->hashed_subvol = hashed_subvol;
+ local->op_ret = 0;
+unlock:
+ UNLOCK(&frame->lock);
+post_unlock:
+ if (!is_last_call(this_call_cnt))
+ goto out;
- if (is_revalidate (loc)) {
- layout = local->layout;
- if (!layout) {
- gf_msg_debug (this->name, 0,
- "Revalidate lookup without cache."
- " path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
- if (layout->gen && (layout->gen < conf->gen)) {
- gf_msg_trace (this->name, 0,
- "incomplete layout failure for path=%s",
- loc->path);
+ DHT_STACK_UNWIND(getxattr, frame, 0, 0, xattr, xdata);
+ goto out;
- dht_layout_unref (this, local->layout);
- local->layout = NULL;
- local->cached_subvol = NULL;
+unwind:
- gf_msg_debug(this->name, 0,
- "Called revalidate lookup for %s, "
- "but layout->gen (%d) is less than "
- "conf->gen (%d), calling fresh_lookup",
- loc->path, layout->gen, conf->gen);
+ GF_FREE(conf->local_nodeuuids[index].elements);
+ conf->local_nodeuuids[index].elements = NULL;
- goto do_fresh_lookup;
- }
+ DHT_STACK_UNWIND(getxattr, frame, -1, local->op_errno, NULL, xdata);
+out:
+ GF_FREE(uuid_list_copy);
+ return 0;
+}
- local->inode = inode_ref (loc->inode);
+static int
+dht_vgetxattr_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ int ret = 0;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ dict_t *dict = NULL;
- ret = dict_set_uint32 (local->xattr_req,
- conf->xattr_name, 4 * 4);
- if (ret) {
- gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s for "
- "path %s", conf->xattr_name, loc->path);
- goto err;
- }
- /* need it in case file is not found on cached file
- * on revalidate path and we may encounter linkto files on
- * with dht_lookup_everywhere*/
- ret = dict_set_uint32 (local->xattr_req,
- conf->link_xattr_name, 256);
- if (ret < 0) {
- gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s for "
- "path %s", conf->link_xattr_name, loc->path);
- goto err;
- }
- if (IA_ISDIR (local->inode->ia_type)) {
- local->call_cnt = call_cnt = conf->subvolume_cnt;
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_revalidate_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- loc, local->xattr_req);
- }
- return 0;
- }
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(frame->local, out);
- call_cnt = local->call_cnt = layout->cnt;
+ local = frame->local;
- /* need it for self-healing linkfiles which is
- 'in-migration' state */
- ret = dict_set_uint32 (local->xattr_req,
- GLUSTERFS_OPEN_FD_COUNT, 4);
- if (ret) {
- gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s for "
- "path %s", GLUSTERFS_OPEN_FD_COUNT, loc->path);
- goto err;
- }
- /* need it for dir self-heal */
- dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req);
+ LOCK(&frame->lock);
+ {
+ this_call_cnt = --local->call_cnt;
+ if (op_ret < 0) {
+ if (op_errno != ENOTCONN) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_GET_XATTR_FAILED, "getxattr err for dir");
+ goto post_unlock;
+ }
- for (i = 0; i < call_cnt; i++) {
- subvol = layout->list[i].xlator;
+ goto unlock;
+ }
- gf_msg_debug (this->name, 0, "calling "
- "revalidate lookup for %s at %s",
- loc->path, subvol->name);
+ ret = dht_vgetxattr_alloc_and_fill(local, xattr, this, op_errno);
+ if (ret) {
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_DICT_SET_FAILED,
+ "alloc or fill failure");
+ goto post_unlock;
+ }
+ }
+unlock:
+ UNLOCK(&frame->lock);
+post_unlock:
+ if (!is_last_call(this_call_cnt))
+ goto out;
- STACK_WIND (frame, dht_revalidate_cbk,
- subvol, subvol->fops->lookup,
- &local->loc, local->xattr_req);
+ /* -- last call: do patch ups -- */
- }
- } else {
- do_fresh_lookup:
- /* TODO: remove the hard-coding */
- ret = dict_set_uint32 (local->xattr_req,
- conf->xattr_name, 4 * 4);
- if (ret) {
- gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s for "
- "path %s", conf->xattr_name, loc->path);
- goto err;
- }
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
- ret = dict_set_uint32 (local->xattr_req,
- conf->link_xattr_name, 256);
- if (ret) {
- gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s for "
- "path %s", conf->link_xattr_name, loc->path);
- goto err;
- }
- /* need it for self-healing linkfiles which is
- 'in-migration' state */
- ret = dict_set_uint32 (local->xattr_req,
- GLUSTERFS_OPEN_FD_COUNT, 4);
- if (ret) {
- gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s for "
- "path %s", GLUSTERFS_OPEN_FD_COUNT, loc->path);
- goto err;
- }
- /* need it for dir self-heal */
- dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req);
+ ret = dht_vgetxattr_fill_and_set(local, &dict, this, _gf_true);
+ if (ret)
+ goto unwind;
- if (!hashed_subvol) {
+ DHT_STACK_UNWIND(getxattr, frame, 0, 0, dict, xdata);
+ goto cleanup;
- gf_msg_debug (this->name, 0,
- "no subvolume in layout for path=%s, "
- "checking on all the subvols to see if "
- "it is a directory", loc->path);
+unwind:
+ DHT_STACK_UNWIND(getxattr, frame, -1, local->op_errno, NULL, NULL);
+cleanup:
+ if (dict)
+ dict_unref(dict);
+out:
+ return 0;
+}
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
+static int
+dht_vgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = 0;
+ dict_t *dict = NULL;
+ xlator_t *prev = NULL;
+ gf_boolean_t flag = _gf_true;
- local->layout = dht_layout_new (this,
- conf->subvolume_cnt);
- if (!local->layout) {
- op_errno = ENOMEM;
- goto err;
- }
+ local = frame->local;
+ prev = cookie;
- gf_msg_debug (this->name, 0,
- "Found null hashed subvol. Calling lookup"
- " on all nodes.");
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_GET_XATTR_FAILED,
+ "vgetxattr: Subvolume %s returned -1", prev->name);
+ goto unwind;
+ }
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- &local->loc, local->xattr_req);
- }
- return 0;
- }
+ ret = dht_vgetxattr_alloc_and_fill(local, xattr, this, op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_NO_MEMORY,
+ "Allocation or fill failure");
+ goto unwind;
+ }
+
+ flag = (local->layout->cnt > 1) ? _gf_true : _gf_false;
+
+ ret = dht_vgetxattr_fill_and_set(local, &dict, this, flag);
+ if (ret)
+ goto unwind;
+
+ DHT_STACK_UNWIND(getxattr, frame, 0, 0, dict, xdata);
+ goto cleanup;
+
+unwind:
+ DHT_STACK_UNWIND(getxattr, frame, -1, local->op_errno, NULL, NULL);
+cleanup:
+ if (dict)
+ dict_unref(dict);
+
+ return 0;
+}
- gf_msg_debug (this->name, 0, "Calling fresh lookup for %s on"
- " %s", loc->path, hashed_subvol->name);
+static int
+dht_linkinfo_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ int ret = 0;
+ char *value = NULL;
- STACK_WIND (frame, dht_lookup_cbk,
- hashed_subvol, hashed_subvol->fops->lookup,
- loc, local->xattr_req);
+ if (op_ret != -1) {
+ ret = dict_get_str(xattr, GF_XATTR_PATHINFO_KEY, &value);
+ if (!ret) {
+ ret = dict_set_str(xattr, GF_XATTR_LINKINFO_KEY, value);
+ if (!ret)
+ gf_msg_trace(this->name, 0, "failed to set linkinfo");
}
+ }
- return 0;
+ DHT_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata);
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
- return 0;
+ return 0;
}
+static int
+dht_mds_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(frame->local, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+ local = frame->local;
+
+ if (!xattr || (op_ret == -1)) {
+ local->op_ret = op_ret;
+ goto out;
+ }
+ dict_del(xattr, conf->xattr_name);
+ local->op_ret = 0;
+
+ if (!local->xattr) {
+ local->xattr = dict_copy_with_ref(xattr, NULL);
+ }
+
+out:
+ DHT_STACK_UNWIND(getxattr, frame, local->op_ret, op_errno, local->xattr,
+ xdata);
+ return 0;
+err:
+ DHT_STACK_UNWIND(getxattr, frame, -1, EINVAL, NULL, NULL);
+ return 0;
+}
int
-dht_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xattr, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = 0;
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_ret = -1;
- local->op_errno = op_errno;
- gf_msg_debug (this->name, 0,
- "Unlink: subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(frame->local, err);
+ VALIDATE_OR_GOTO(this->private, err);
- local->op_ret = 0;
+ conf = this->private;
+ local = frame->local;
- local->postparent = *postparent;
- local->preparent = *preparent;
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto err;
+ return 0;
+ }
- if (local->loc.parent) {
- dht_inode_ctx_time_update (local->loc.parent, this,
- &local->preparent, 0);
- dht_inode_ctx_time_update (local->loc.parent, this,
- &local->postparent, 1);
- }
+ LOCK(&frame->lock);
+ {
+ if (!xattr || (op_ret == -1)) {
+ local->op_ret = op_ret;
+ goto unlock;
+ }
+
+ dict_del(xattr, conf->xattr_name);
+ dict_del(xattr, conf->mds_xattr_key);
+
+ dict_del(xattr, conf->commithash_xattr_name);
+
+ if (frame->root->pid >= 0) {
+ GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr);
+ GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr);
}
+
+ local->op_ret = 0;
+
+ if (!local->xattr) {
+ local->xattr = dict_copy_with_ref(xattr, NULL);
+ } else {
+ dht_aggregate_xattr(local->xattr, xattr);
+ }
+
+ if (!local->xdata) {
+ local->xdata = dict_ref(xdata);
+ } else if ((local->inode && IA_ISDIR(local->inode->ia_type)) ||
+ (local->fd && IA_ISDIR(local->fd->inode->ia_type))) {
+ dht_aggregate_xattr(local->xdata, xdata);
+ }
+ }
unlock:
- UNLOCK (&frame->lock);
+ UNLOCK(&frame->lock);
- DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, NULL);
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ /* If we have a valid xattr received from any one of the
+ * subvolume, let's return it */
+ if (local->xattr) {
+ local->op_ret = 0;
+ }
- return 0;
+ DHT_STACK_UNWIND(getxattr, frame, local->op_ret, op_errno, local->xattr,
+ local->xdata);
+ }
+ return 0;
+err:
+ DHT_STACK_UNWIND(getxattr, frame, -1, EINVAL, NULL, NULL);
+ return 0;
}
+static int32_t
+dht_getxattr_unwind(call_frame_t *frame, int op_ret, int op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ DHT_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
-int
-dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+static int
+dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
+ local = frame->local;
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if ((op_ret == -1) && !((op_errno == ENOENT) ||
- (op_errno == ENOTCONN))) {
- local->op_errno = op_errno;
- gf_msg_debug (this->name, 0,
- "Unlink link: subvolume %s"
- " returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
+ LOCK(&frame->lock);
+ {
+ if (local->op_errno == EOPNOTSUPP) {
+ /* Nothing to do here, we have already found
+ * a subvol which does not have the get_real_filename
+ * optimization. If condition is for simple logic.
+ */
+ goto unlock;
+ }
+
+ if (op_ret == -1) {
+ if (op_errno == EOPNOTSUPP) {
+ /* This subvol does not have the optimization.
+ * Better let the user know we don't support it.
+ * Remove previous results if any.
+ */
+
+ if (local->xattr) {
+ dict_unref(local->xattr);
+ local->xattr = NULL;
}
- local->op_ret = 0;
+ if (local->xattr_req) {
+ dict_unref(local->xattr_req);
+ local->xattr_req = NULL;
+ }
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_UPGRADE_BRICKS,
+ "At least "
+ "one of the bricks does not support "
+ "this operation. Please upgrade all "
+ "bricks.");
+ goto post_unlock;
+ }
+
+ if (op_errno == ENOATTR) {
+ /* Do nothing, our defaults are set to this.
+ */
+ goto unlock;
+ }
+
+ /* This is a place holder for every other error
+ * case. I am not sure of how to interpret
+ * ENOTCONN etc. As of now, choosing to ignore
+ * down subvol and return a good result(if any)
+ * from other subvol.
+ */
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_GET_XATTR_FAILED, "Failed to get real filename.");
+ goto post_unlock;
+ }
+
+ /* This subvol has the required file.
+ * There could be other subvols which have returned
+ * success already, choosing to return the latest good
+ * result.
+ */
+ if (local->xattr)
+ dict_unref(local->xattr);
+ local->xattr = dict_ref(xattr);
+
+ if (local->xattr_req) {
+ dict_unref(local->xattr_req);
+ local->xattr_req = NULL;
}
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ local->op_ret = op_ret;
+ local->op_errno = 0;
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, 0, "Found a matching file.");
+ goto post_unlock;
+ }
unlock:
- UNLOCK (&frame->lock);
+ UNLOCK(&frame->lock);
+post_unlock:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ DHT_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno,
+ local->xattr, local->xattr_req);
+ }
+
+ return 0;
+}
- if (local->op_ret == -1)
- goto err;
+static int
+dht_getxattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *key, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int i = 0;
+ dht_layout_t *layout = NULL;
+ int cnt = 0;
+ xlator_t *subvol = NULL;
- cached_subvol = dht_subvol_get_cached (this, local->loc.inode);
- if (!cached_subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for path=%s",
- local->loc.path);
- local->op_errno = EINVAL;
- goto err;
- }
+ local = frame->local;
+ layout = local->layout;
- STACK_WIND (frame, dht_unlink_cbk,
- cached_subvol, cached_subvol->fops->unlink,
- &local->loc, local->flags, NULL);
+ cnt = local->call_cnt = layout->cnt;
- return 0;
+ local->op_ret = -1;
+ local->op_errno = ENOATTR;
-err:
- DHT_STACK_UNWIND (unlink, frame, -1, local->op_errno,
- NULL, NULL, NULL);
- return 0;
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND(frame, dht_getxattr_get_real_filename_cbk, subvol,
+ subvol->fops->getxattr, loc, key, xdata);
+ }
+
+ return 0;
}
-int
-dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+static int
+dht_marker_populate_args(call_frame_t *frame, int type, int *gauge,
+ xlator_t **subvols)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ dht_layout_t *layout = NULL;
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_msg_debug (this->name, 0,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ local = frame->local;
+ layout = local->layout;
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
+ for (i = 0; i < layout->cnt; i++)
+ subvols[i] = layout->list[i].xlator;
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (setxattr, frame, local->op_ret,
- local->op_errno, NULL);
- }
+ return layout->cnt;
+}
- return 0;
+static int
+dht_is_debug_xattr_key(const char **array, char *key)
+{
+ int i = 0;
+
+ for (i = 0; array[i]; i++) {
+ if (fnmatch(array[i], key, FNM_NOESCAPE) == 0)
+ return i;
+ }
+
+ return -1;
}
-static void
-fill_layout_info (dht_layout_t *layout, char *buf)
+/* Note we already have frame->local initialised here*/
+
+static int
+dht_handle_debug_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *key)
{
- int i = 0;
- char tmp_buf[128] = {0,};
+ dht_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = ENODATA;
+ char *value = NULL;
+ loc_t file_loc = {0};
+ const char *name = NULL;
+
+ local = frame->local;
+
+ if (dht_is_debug_xattr_key(dht_dbg_vxattrs, (char *)key) == -1) {
+ goto out;
+ }
+
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (strncmp(key, DHT_DBG_HASHED_SUBVOL_KEY,
+ SLEN(DHT_DBG_HASHED_SUBVOL_KEY)) == 0) {
+ name = key + strlen(DHT_DBG_HASHED_SUBVOL_KEY);
+ if (strlen(name) == 0) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = dht_build_child_loc(this, &file_loc, loc, (char *)name);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- for (i = 0; i < layout->cnt; i++) {
- snprintf (tmp_buf, 128, "(%s %u %u)",
- layout->list[i].xlator->name,
- layout->list[i].start,
- layout->list[i].stop);
- if (i)
- strcat (buf, " ");
- strcat (buf, tmp_buf);
+ local->hashed_subvol = dht_subvol_get_hashed(this, &file_loc);
+ if (local->hashed_subvol == NULL) {
+ op_errno = ENODATA;
+ goto out;
}
+
+ value = gf_strdup(local->hashed_subvol->name);
+ if (!value) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_dynstr(local->xattr, (char *)key, value);
+ if (ret < 0) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+ goto out;
+ }
+
+out:
+ loc_wipe(&file_loc);
+ DHT_STACK_UNWIND(getxattr, frame, ret, op_errno, local->xattr, NULL);
+ return 0;
}
-void
-dht_fill_pathinfo_xattr (xlator_t *this, dht_local_t *local,
- char *xattr_buf, int32_t alloc_len,
- int flag, char *layout_buf)
-{
- if (flag && local->xattr_val)
- snprintf (xattr_buf, alloc_len,
- "((<"DHT_PATHINFO_HEADER"%s> %s) (%s-layout %s))",
- this->name, local->xattr_val, this->name,
- layout_buf);
- else if (local->xattr_val)
- snprintf (xattr_buf, alloc_len,
- "(<"DHT_PATHINFO_HEADER"%s> %s)",
- this->name, local->xattr_val);
- else if (flag)
- snprintf (xattr_buf, alloc_len, "(%s-layout %s)",
- this->name, layout_buf);
+/* Virtual Xattr which returns 1 if all subvols are up,
+ else returns 0. Geo-rep then uses this virtual xattr
+ after a fresh mount and starts the I/O.
+*/
+
+enum dht_vxattr_subvol {
+ DHT_VXATTR_SUBVOLS_UP = 1,
+ DHT_VXATTR_SUBVOLS_DOWN = 0,
+};
+
+int
+dht_vgetxattr_subvol_status(call_frame_t *frame, xlator_t *this,
+ const char *key)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = ENODATA;
+ int value = DHT_VXATTR_SUBVOLS_UP;
+ int i = 0;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ local = frame->local;
+
+ if (!key) {
+ op_errno = EINVAL;
+ goto out;
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ value = DHT_VXATTR_SUBVOLS_DOWN;
+ gf_msg_debug(this->name, 0, "subvol %s is down ",
+ conf->subvolumes[i]->name);
+ break;
+ }
+ }
+ ret = dict_set_int8(local->xattr, (char *)key, value);
+ if (ret < 0) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+
+out:
+ DHT_STACK_UNWIND(getxattr, frame, ret, op_errno, local->xattr, NULL);
+ return 0;
}
int
-dht_vgetxattr_alloc_and_fill (dht_local_t *local, dict_t *xattr, xlator_t *this,
- int op_errno)
+dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
+ dict_t *xdata)
+#define DHT_IS_DIR(layout) (layout->cnt > 1)
{
- int ret = -1;
- char *value = NULL;
- int32_t plen = 0;
+ xlator_t *subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *mds_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int op_errno = -1;
+ int i = 0;
+ int cnt = 0;
+ char *node_uuid_key = NULL;
+ int ret = -1;
+
+ GF_CHECK_XATTR_KEY_AND_GOTO(key, IO_THREADS_QUEUE_SIZE_KEY, op_errno, err);
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_GETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_NULL,
+ "Layout is NULL");
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ /* skip over code which is irrelevant without a valid key */
+ if (!key)
+ goto no_key;
+
+ local->key = gf_strdup(key);
+ if (!local->key) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (strncmp(key, conf->mds_xattr_key, strlen(key)) == 0) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ if (strncmp(key, DHT_SUBVOL_STATUS_KEY, SLEN(DHT_SUBVOL_STATUS_KEY)) == 0) {
+ dht_vgetxattr_subvol_status(frame, this, key);
+ return 0;
+ }
- ret = dict_get_str (xattr, local->xsel, &value);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Subvolume %s returned -1 (%s)", this->name,
- strerror (op_errno));
- local->op_ret = -1;
- local->op_errno = op_errno;
- goto out;
+ /* skip over code which is irrelevant if !DHT_IS_DIR(layout) */
+ if (!DHT_IS_DIR(layout))
+ goto no_dht_is_dir;
+
+ if ((strncmp(key, GF_XATTR_GET_REAL_FILENAME_KEY,
+ SLEN(GF_XATTR_GET_REAL_FILENAME_KEY)) == 0) &&
+ DHT_IS_DIR(layout)) {
+ dht_getxattr_get_real_filename(frame, this, loc, key, xdata);
+ return 0;
+ }
+
+ if (!strcmp(key, GF_REBAL_FIND_LOCAL_SUBVOL)) {
+ ret = gf_asprintf(&node_uuid_key, "%s", GF_XATTR_LIST_NODE_UUIDS_KEY);
+ if (ret == -1 || !node_uuid_key) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_NO_MEMORY,
+ "Failed to copy node uuid key");
+ op_errno = ENOMEM;
+ goto err;
+ }
+ (void)snprintf(local->xsel, sizeof(local->xsel), "%s", node_uuid_key);
+ cnt = local->call_cnt = conf->subvolume_cnt;
+ for (i = 0; i < cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_find_local_subvol_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->getxattr, loc,
+ node_uuid_key, xdata);
}
+ if (node_uuid_key)
+ GF_FREE(node_uuid_key);
+ return 0;
+ }
+
+ if (!strcmp(key, GF_REBAL_OLD_FIND_LOCAL_SUBVOL)) {
+ ret = gf_asprintf(&node_uuid_key, "%s", GF_XATTR_NODE_UUID_KEY);
+ if (ret == -1 || !node_uuid_key) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_NO_MEMORY,
+ "Failed to copy node uuid key");
+ op_errno = ENOMEM;
+ goto err;
+ }
+ (void)snprintf(local->xsel, sizeof(local->xsel), "%s", node_uuid_key);
+ cnt = local->call_cnt = conf->subvolume_cnt;
+ for (i = 0; i < cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_find_local_subvol_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->getxattr, loc,
+ node_uuid_key, xdata);
+ }
+ if (node_uuid_key)
+ GF_FREE(node_uuid_key);
+ return 0;
+ }
+
+ /* for file use cached subvolume (obviously!): see if {}
+ * below
+ * for directory:
+ * wind to all subvolumes and exclude subvolumes which
+ * return ENOTCONN (in callback)
+ *
+ * NOTE: Don't trust inode here, as that may not be valid
+ * (until inode_link() happens)
+ */
+
+ if (XATTR_IS_PATHINFO(key) || (strcmp(key, GF_XATTR_NODE_UUID_KEY) == 0) ||
+ (strcmp(key, GF_XATTR_LIST_NODE_UUIDS_KEY) == 0)) {
+ (void)snprintf(local->xsel, sizeof(local->xsel), "%s", key);
+ cnt = local->call_cnt = layout->cnt;
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND(frame, dht_vgetxattr_dir_cbk, subvol,
+ subvol->fops->getxattr, loc, key, xdata);
+ }
+ return 0;
+ }
- local->alloc_len += strlen(value);
+no_dht_is_dir:
+ /* node-uuid or pathinfo for files */
+ if (XATTR_IS_PATHINFO(key) || (strcmp(key, GF_XATTR_NODE_UUID_KEY) == 0)) {
+ cached_subvol = local->cached_subvol;
+ (void)snprintf(local->xsel, sizeof(local->xsel), "%s", key);
+ local->call_cnt = 1;
+ STACK_WIND_COOKIE(frame, dht_vgetxattr_cbk, cached_subvol,
+ cached_subvol, cached_subvol->fops->getxattr, loc,
+ key, xdata);
- if (!local->xattr_val) {
- local->alloc_len += (strlen (DHT_PATHINFO_HEADER) + 10);
- local->xattr_val = GF_CALLOC (local->alloc_len, sizeof (char),
- gf_common_mt_char);
- if (!local->xattr_val) {
- ret = -1;
- goto out;
- }
+ return 0;
+ }
+
+ if (strcmp(key, GF_XATTR_LINKINFO_KEY) == 0) {
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (!hashed_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get hashed subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
}
- if (local->xattr_val) {
- plen = strlen (local->xattr_val);
- if (plen) {
- /* extra byte(s) for \0 to be safe */
- local->alloc_len += (plen + 2);
- local->xattr_val = GF_REALLOC (local->xattr_val,
- local->alloc_len);
- if (!local->xattr_val) {
- ret = -1;
- goto out;
- }
- }
+ cached_subvol = dht_subvol_get_cached(this, loc->inode);
+ if (!cached_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_CACHED_SUBVOL_GET_FAILED,
+ "Failed to get cached subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- (void) strcat (local->xattr_val, value);
- (void) strcat (local->xattr_val, " ");
- local->op_ret = 0;
+ if (hashed_subvol == cached_subvol) {
+ op_errno = ENODATA;
+ goto err;
}
- ret = 0;
+ STACK_WIND(frame, dht_linkinfo_getxattr_cbk, hashed_subvol,
+ hashed_subvol->fops->getxattr, loc, GF_XATTR_PATHINFO_KEY,
+ xdata);
+ return 0;
+ }
- out:
- return ret;
+ if (dht_is_debug_xattr_key(dht_dbg_vxattrs, (char *)key) >= 0) {
+ dht_handle_debug_getxattr(frame, this, loc, key);
+ return 0;
+ }
+
+no_key:
+ if (cluster_handle_marker_getxattr(frame, loc, key, conf->vol_uuid,
+ dht_getxattr_unwind,
+ dht_marker_populate_args) == 0)
+ return 0;
+
+ if (DHT_IS_DIR(layout)) {
+ local->call_cnt = conf->subvolume_cnt;
+ cnt = conf->subvolume_cnt;
+ ret = dht_inode_ctx_mdsvol_get(loc->inode, this, &mds_subvol);
+ if (!mds_subvol) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Cannot determine MDS, fetching xattr %s randomly"
+ " from a subvol for path %s ",
+ key, loc->path);
+ } else {
+ /* TODO need to handle it, As of now we are
+ choosing availability instead of chossing
+ consistencty, in case of mds_subvol is
+ down winding a getxattr call on other subvol
+ and return xattr
+ */
+ local->mds_subvol = mds_subvol;
+ for (i = 0; i < cnt; i++) {
+ if (conf->subvolumes[i] == mds_subvol) {
+ if (!conf->subvolume_status[i]) {
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ DHT_MSG_HASHED_SUBVOL_DOWN,
+ "MDS %s is down for path"
+ " path %s so fetching xattr "
+ "%s randomly from a subvol ",
+ local->mds_subvol->name, loc->path, key);
+ ret = 1;
+ }
+ }
+ }
+ }
+
+ if (!ret && key && local->mds_subvol && dht_match_xattr(key)) {
+ STACK_WIND(frame, dht_mds_getxattr_cbk, local->mds_subvol,
+ local->mds_subvol->fops->getxattr, loc, key, xdata);
+
+ return 0;
+ }
+ } else {
+ cnt = local->call_cnt = 1;
+ }
+
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND(frame, dht_getxattr_cbk, subvol, subvol->fops->getxattr, loc,
+ key, xdata);
+ }
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(getxattr, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
}
+#undef DHT_IS_DIR
int
-dht_vgetxattr_fill_and_set (dht_local_t *local, dict_t **dict, xlator_t *this,
- gf_boolean_t flag)
+dht_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+ dict_t *xdata)
{
- int ret = -1;
- char *xattr_buf = NULL;
- char layout_buf[8192] = {0,};
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int op_errno = -1;
+ int i = 0;
+ int cnt = 0;
+ xlator_t *mds_subvol = NULL;
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+ VALIDATE_OR_GOTO(fd->inode, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FGETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_NULL,
+ "Layout is NULL");
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ if (key) {
+ local->key = gf_strdup(key);
+ if (!local->key) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ }
+
+ gf_uuid_unparse(fd->inode->gfid, gfid);
+
+ if ((fd->inode->ia_type == IA_IFDIR) && key &&
+ (strncmp(key, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) !=
+ 0)) {
+ local->call_cnt = conf->subvolume_cnt;
+ cnt = conf->subvolume_cnt;
+ ret = dht_inode_ctx_mdsvol_get(fd->inode, this, &mds_subvol);
+
+ if (!mds_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "cannot determine MDS, fetching xattr %s "
+ " randomly from a subvol for gfid %s ",
+ key, gfid);
+ } else {
+ /* TODO need to handle it, As of now we are
+ choosing availability instead of chossing
+ consistencty, in case of hashed_subvol is
+ down winding a getxattr call on other subvol
+ and return xattr
+ */
+ local->mds_subvol = mds_subvol;
+ for (i = 0; i < cnt; i++) {
+ if (conf->subvolumes[i] == mds_subvol) {
+ if (!conf->subvolume_status[i]) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_HASHED_SUBVOL_DOWN,
+ "MDS subvolume %s is down"
+ " for gfid %s so fetching xattr "
+ " %s randomly from a subvol ",
+ local->mds_subvol->name, gfid, key);
+ ret = 1;
+ }
+ }
+ }
+ }
+
+ if (!ret && key && local->mds_subvol && dht_match_xattr(key)) {
+ STACK_WIND(frame, dht_mds_getxattr_cbk, local->mds_subvol,
+ local->mds_subvol->fops->fgetxattr, fd, key, NULL);
+
+ return 0;
+ }
+
+ } else {
+ cnt = local->call_cnt = 1;
+ }
+
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND(frame, dht_getxattr_cbk, subvol, subvol->fops->fgetxattr, fd,
+ key, NULL);
+ }
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fgetxattr, frame, -1, op_errno, NULL, NULL);
- if (flag)
- fill_layout_info (local->layout, layout_buf);
+ return 0;
+}
- *dict = dict_new ();
- if (!*dict)
- goto out;
+static int
+dht_setxattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
- local->xattr_val[strlen (local->xattr_val) - 1] = '\0';
+ if (!frame || !frame->local)
+ goto err;
- /* we would need max this many bytes to create xattr string
- * extra 40 bytes is just an estimated amount of additional
- * space required as we include translator name and some
- * spaces, brackets etc. when forming the pathinfo string.
- *
- * For node-uuid we just don't have all the pretty formatting,
- * but since this is a generic routine for pathinfo & node-uuid
- * we dont have conditional space allocation and try to be
- * generic
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
*/
- local->alloc_len += (2 * strlen (this->name))
- + strlen (layout_buf)
- + 40;
- xattr_buf = GF_CALLOC (local->alloc_len, sizeof (char),
- gf_common_mt_char);
- if (!xattr_buf)
- goto out;
+ DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
+ local->rebalance.xdata);
+ return 0;
+ }
- if (XATTR_IS_PATHINFO (local->xsel)) {
- (void) dht_fill_pathinfo_xattr (this, local, xattr_buf,
- local->alloc_len, flag,
- layout_buf);
- } else if (XATTR_IS_NODE_UUID (local->xsel)) {
- (void) snprintf (xattr_buf, local->alloc_len, "%s",
- local->xattr_val);
- } else {
- gf_log (this->name, GF_LOG_WARNING,
- "Unknown local->xsel (%s)", local->xsel);
- GF_FREE (xattr_buf);
- goto out;
- }
+ if (subvol == NULL)
+ goto err;
- ret = dict_set_dynstr (*dict, local->xsel, xattr_buf);
- if (ret)
- GF_FREE (xattr_buf);
- GF_FREE (local->xattr_val);
+ local->call_cnt = 2; /* This is the second attempt */
- out:
- return ret;
+ if (local->fop == GF_FOP_SETXATTR) {
+ STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
+ subvol->fops->setxattr, &local->loc,
+ local->rebalance.xattr, local->rebalance.flags,
+ local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
+ subvol->fops->fsetxattr, local->fd,
+ local->rebalance.xattr, local->rebalance.flags,
+ local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND(setxattr, frame, (local ? local->op_ret : -1), op_errno,
+ NULL);
+ return 0;
}
int
-dht_vgetxattr_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+dht_file_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- int ret = 0;
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- dict_t *dict = NULL;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ struct iatt *stbuf = NULL;
+ inode_t *inode = NULL;
+ xlator_t *subvol1 = NULL, *subvol2 = NULL;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (frame->local, out);
+ local = frame->local;
+ prev = cookie;
- local = frame->local;
+ local->op_errno = op_errno;
- LOCK (&frame->lock);
- {
- this_call_cnt = --local->call_cnt;
- if (op_ret < 0) {
- if (op_errno != ENOTCONN) {
- gf_log (this->name, GF_LOG_ERROR,
- "getxattr err (%s) for dir",
- strerror (op_errno));
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
+ if ((local->fop == GF_FOP_FSETXATTR) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- goto unlock;
- }
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1.",
+ prev->name);
+ goto out;
+ }
- ret = dht_vgetxattr_alloc_and_fill (local, xattr, this,
- op_errno);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "alloc or fill failure");
- }
- unlock:
- UNLOCK (&frame->lock);
+ if (local->call_cnt != 1)
+ goto out;
- if (!is_last_call (this_call_cnt))
- goto out;
+ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
- /* -- last call: do patch ups -- */
+ if ((!op_ret) && !stbuf) {
+ goto out;
+ }
- if (local->op_ret == -1) {
- goto unwind;
+ local->op_ret = op_ret;
+ local->rebalance.target_op_fn = dht_setxattr2;
+ if (xdata)
+ local->rebalance.xdata = dict_ref(xdata);
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(stbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Phase 1 of migration */
+ if (IS_DHT_MIGRATION_PHASE1(stbuf)) {
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+
+ ret = dht_inode_ctx_get_mig_info(this, inode, &subvol1, &subvol2);
+ if (!dht_mig_info_is_invalid(local->cached_subvol, subvol1, subvol2)) {
+ dht_setxattr2(this, subvol2, frame, 0);
+ return 0;
}
- ret = dht_vgetxattr_fill_and_set (local, &dict, this, _gf_true);
- if (ret)
- goto unwind;
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
- DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata);
- goto cleanup;
+out:
- unwind:
- DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, NULL, NULL);
- cleanup:
- if (dict)
- dict_unref (dict);
- out:
- return 0;
+ if (local->fop == GF_FOP_SETXATTR) {
+ DHT_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata);
+ } else {
+ DHT_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata);
+ }
+
+ return 0;
}
-int
-dht_vgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+/* Function is call by dict_foreach_fnmatch if key is match with
+ user.* and set boolean flag to true
+*/
+static int
+dht_is_user_xattr(dict_t *this, char *key, data_t *value, void *data)
{
- dht_local_t *local = NULL;
- int ret = 0;
- dict_t *dict = NULL;
- call_frame_t *prev = NULL;
- gf_boolean_t flag = _gf_true;
+ gf_boolean_t *user_xattr_found = data;
+ *user_xattr_found = _gf_true;
+ return 0;
+}
- local = frame->local;
- prev = cookie;
+/* Common code to wind a (f)(set|remove)xattr call to set xattr on directory
+ */
+static int
+dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, dict_t *xattr, int flags,
+ dict_t *xdata, int *op_errno)
- if (op_ret < 0) {
- local->op_ret = -1;
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_ERROR,
- "vgetxattr: Subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unwind;
+{
+ dict_t *xattrop = NULL;
+ int32_t subone[1] = {-1};
+ gf_boolean_t uxattr_key_found = _gf_false;
+ xlator_t *mds_subvol = NULL;
+ xlator_t *travvol = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int i = 0;
+ int call_cnt = 0;
+ dht_local_t *local = NULL;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char **xattrs_to_heal;
+
+ conf = this->private;
+ local = frame->local;
+ call_cnt = conf->subvolume_cnt;
+ local->flags = flags;
+ xattrs_to_heal = get_xattrs_to_heal();
+
+ if (!gf_uuid_is_null(local->gfid)) {
+ gf_uuid_unparse(local->gfid, gfid_local);
+ }
+
+ if ((local->fop == GF_FOP_SETXATTR) || (local->fop == GF_FOP_FSETXATTR)) {
+ /* Check if any user xattr present in xattr
+ */
+ dict_foreach_fnmatch(xattr, "user*", dht_is_user_xattr,
+ &uxattr_key_found);
+
+ /* Check if any custom key xattr present in dict xattr
+ and start index from 1 because user xattr already
+ checked in previous line
+ */
+ for (i = 1; xattrs_to_heal[i]; i++)
+ if (dict_get(xattr, xattrs_to_heal[i]))
+ uxattr_key_found = _gf_true;
+ }
+
+ if ((local->fop == GF_FOP_REMOVEXATTR) ||
+ (local->fop == GF_FOP_FREMOVEXATTR)) {
+ /* Check if any custom key xattr present in local->key
+ */
+ for (i = 0; xattrs_to_heal[i]; i++)
+ if (strstr(local->key, xattrs_to_heal[i]))
+ uxattr_key_found = _gf_true;
+ }
+
+ /* If there is no custom key xattr present or gfid is root
+ or call_cnt is 1 then wind a (f)setxattr call on all subvols
+ */
+ if (!uxattr_key_found || __is_root_gfid(local->gfid) || call_cnt == 1) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ travvol = conf->subvolumes[i];
+ if ((local->fop == GF_FOP_SETXATTR) ||
+ (local->fop == GF_FOP_FSETXATTR)) {
+ if (fd) {
+ STACK_WIND_COOKIE(frame, dht_err_cbk, travvol, travvol,
+ travvol->fops->fsetxattr, fd, xattr,
+ flags, xdata);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_err_cbk, travvol, travvol,
+ travvol->fops->setxattr, loc, xattr,
+ flags, xdata);
+ }
+ }
+
+ if ((local->fop == GF_FOP_REMOVEXATTR) ||
+ (local->fop == GF_FOP_FREMOVEXATTR)) {
+ if (fd) {
+ STACK_WIND_COOKIE(frame, dht_err_cbk, travvol, travvol,
+ travvol->fops->fremovexattr, fd,
+ local->key, local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_err_cbk, travvol, travvol,
+ travvol->fops->removexattr, loc,
+ local->key, local->xattr_req);
+ }
+ }
}
- ret = dht_vgetxattr_alloc_and_fill (local, xattr, this,
- op_errno);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_NO_MEMORY,
- "Allocation or fill failure");
- goto unwind;
+ return 0;
+ }
+
+ /* Calculate hash subvol based on inode and parent inode
+ */
+ if (fd) {
+ ret = dht_inode_ctx_mdsvol_get(fd->inode, this, &mds_subvol);
+ } else {
+ ret = dht_inode_ctx_mdsvol_get(loc->inode, this, &mds_subvol);
+ }
+ if (ret || !mds_subvol) {
+ if (fd) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get mds subvol for fd %p"
+ "gfid is %s ",
+ fd, gfid_local);
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "%s: Failed to get mds subvol. (gfid is %s)", loc->path,
+ gfid_local);
+ }
+ (*op_errno) = ENOENT;
+ goto err;
+ }
+
+ local->mds_subvol = mds_subvol;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == mds_subvol) {
+ if (!conf->subvolume_status[i]) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_HASHED_SUBVOL_DOWN,
+ "MDS subvol is down for path "
+ " %s gfid is %s Unable to set xattr ",
+ local->loc.path, gfid_local);
+ (*op_errno) = ENOTCONN;
+ goto err;
+ }
+ }
+ }
+
+ if (uxattr_key_found) {
+ xattrop = dict_new();
+ if (!xattrop) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0,
+ "dictionary creation failed for path %s "
+ "for gfid is %s ",
+ local->loc.path, gfid_local);
+ (*op_errno) = ENOMEM;
+ goto err;
+ }
+ local->xattr = dict_ref(xattr);
+ /* Subtract current MDS xattr value to -1 , value of MDS
+ xattr represents no. of times xattr modification failed
+ on non MDS subvols.
+ */
+ ret = dht_dict_set_array(xattrop, conf->mds_xattr_key, subone, 1);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "dictionary set array failed for path %s "
+ "for gfid is %s ",
+ local->loc.path, gfid_local);
+ if (xattrop)
+ dict_unref(xattrop);
+ (*op_errno) = ret;
+ goto err;
+ }
+ /* Wind a xattrop call to use ref counting approach
+ update mds xattr to -1 before update xattr on
+ hashed subvol and update mds xattr to +1 after update
+ xattr on all non hashed subvol
+ */
+ if (fd) {
+ STACK_WIND(frame, dht_xattrop_mds_cbk, local->mds_subvol,
+ local->mds_subvol->fops->fxattrop, fd,
+ GF_XATTROP_ADD_ARRAY, xattrop, NULL);
+ } else {
+ STACK_WIND(frame, dht_xattrop_mds_cbk, local->mds_subvol,
+ local->mds_subvol->fops->xattrop, loc,
+ GF_XATTROP_ADD_ARRAY, xattrop, NULL);
}
+ if (xattrop)
+ dict_unref(xattrop);
+ }
- flag = (local->layout->cnt > 1) ? _gf_true : _gf_false;
+ return 0;
+err:
+ return -1;
+}
- ret = dht_vgetxattr_fill_and_set (local, &dict, this, flag);
+int
+dht_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr,
+ int flags, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+ VALIDATE_OR_GOTO(fd->inode, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ if (!conf->defrag)
+ GF_IF_INTERNAL_XATTR_GOTO(conf->wild_xattr_name, xattr, op_errno, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FSETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0, "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->xattr_req = xdata ? dict_ref(xdata) : dict_new();
+ local->call_cnt = call_cnt = layout->cnt;
+
+ if (IA_ISDIR(fd->inode->ia_type)) {
+ local->hashed_subvol = NULL;
+ ret = dht_dir_common_set_remove_xattr(frame, this, NULL, fd, xattr,
+ flags, xdata, &op_errno);
if (ret)
- goto unwind;
+ goto err;
+ } else {
+ local->call_cnt = 1;
+ local->rebalance.xattr = dict_ref(xattr);
+ local->rebalance.flags = flags;
+
+ ret = dict_set_int8(local->xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Failed to set dictionary key %s for fd=%p",
+ DHT_IATT_IN_XDATA_KEY, fd);
+ }
- DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata);
- goto cleanup;
+ STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
+ subvol->fops->fsetxattr, fd, xattr, flags,
+ local->xattr_req);
+ }
+ return 0;
- unwind:
- DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno,
- NULL, NULL);
- cleanup:
- if (dict)
- dict_unref (dict);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fsetxattr, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
-int
-dht_linkinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr,
- dict_t *xdata)
+static int
+dht_checking_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
{
- int ret = 0;
- char *value = NULL;
-
- if (op_ret != -1) {
- ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value);
- if (!ret) {
- ret = dict_set_str (xattr, GF_XATTR_LINKINFO_KEY, value);
- if (!ret)
- gf_msg_trace (this->name, 0,
- "failed to set linkinfo");
- }
+ int i = -1;
+ int ret = -1;
+ char *value = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *prev = NULL;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ if (op_ret == -1)
+ goto out;
+
+ ret = dict_get_str(xattr, GF_XATTR_PATHINFO_KEY, &value);
+ if (ret)
+ goto out;
+
+ if (!strcmp(value, local->key)) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev)
+ conf->decommissioned_bricks[i] = prev;
}
+ }
+
+out:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ DHT_STACK_UNWIND(setxattr, frame, local->op_ret, ENOTSUP, NULL);
+ }
+ return 0;
+}
+
+static int
+dht_nuke_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
- DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+static int
+dht_nuke_dir(call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *tmp)
+{
+ if (!IA_ISDIR(loc->inode->ia_type)) {
+ DHT_STACK_UNWIND(setxattr, frame, -1, ENOTSUP, NULL);
+ return 0;
+ }
+ /* Setxattr didn't need the parent, but rmdir does. */
+ loc->parent = inode_parent(loc->inode, NULL, NULL);
+ if (!loc->parent) {
+ DHT_STACK_UNWIND(setxattr, frame, -1, ENOENT, NULL);
return 0;
+ }
+ gf_uuid_copy(loc->pargfid, loc->parent->gfid);
+
+ if (!loc->name && loc->path) {
+ loc->name = strrchr(loc->path, '/');
+ if (loc->name) {
+ ++(loc->name);
+ }
+ }
+
+ /*
+ * We do this instead of calling dht_rmdir_do directly for two reasons.
+ * The first is that we want to reuse all of the initialization that
+ * dht_rmdir does, so if it ever changes we'll just follow along. The
+ * second (i.e. why we don't use STACK_WIND_TAIL) is so that we don't
+ * obscure the fact that we came in via this path instead of a genuine
+ * rmdir. That makes debugging just a tiny bit easier.
+ */
+ STACK_WIND(frame, dht_nuke_dir_cbk, this, this->fops->rmdir, loc, 1, NULL);
+
+ return 0;
}
int
-dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
+ int flags, dict_t *xdata)
{
- int this_call_cnt = 0;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int op_errno = EINVAL;
+ int ret = -1;
+ data_t *tmp = NULL;
+ uint32_t dir_spread = 0;
+ char value[4096] = {
+ 0,
+ };
+ gf_dht_migrate_data_type_t forced_rebalance = GF_DHT_MIGRATE_DATA;
+ int call_cnt = 0;
+ uint32_t new_hash = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, err);
+
+ methods = &(conf->methods);
+
+ /* Rebalance daemon is allowed to set internal keys */
+ if (!conf->defrag)
+ GF_IF_INTERNAL_XATTR_GOTO(conf->wild_xattr_name, xattr, op_errno, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_SETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0, "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+ tmp = dict_get(xattr, conf->mds_xattr_key);
+ if (tmp) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ tmp = dict_get(xattr, GF_XATTR_FILE_MIGRATE_KEY);
+ if (tmp) {
+ if (IA_ISDIR(loc->inode->ia_type)) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ /* TODO: need to interpret the 'value' for more meaning
+ (ie, 'target' subvolume given there, etc) */
+ memcpy(value, tmp->data, tmp->len);
+ if (strcmp(value, "force") == 0)
+ forced_rebalance = GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS;
+
+ if (conf->decommission_in_progress)
+ forced_rebalance = GF_DHT_MIGRATE_HARDLINK;
+
+ if (!loc->path) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!local->loc.name)
+ local->loc.name = strrchr(local->loc.path, '/') + 1;
+
+ if (!local->loc.parent)
+ local->loc.parent = inode_parent(local->loc.inode, NULL, NULL);
+
+ if ((!local->loc.name) || (!local->loc.parent)) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (gf_uuid_is_null(local->loc.pargfid))
+ gf_uuid_copy(local->loc.pargfid, local->loc.parent->gfid);
+
+ methods->migration_get_dst_subvol(this, local);
+
+ if (!local->rebalance.target_node) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get hashed subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->rebalance.from_subvol = local->cached_subvol;
+
+ if (local->rebalance.target_node == local->rebalance.from_subvol) {
+ op_errno = EEXIST;
+ goto err;
+ }
+ if (local->rebalance.target_node) {
+ local->flags = forced_rebalance;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (frame->local, out);
- VALIDATE_OR_GOTO (this->private, out);
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
- conf = this->private;
- local = frame->local;
+ ret = dht_start_rebalance_task(this, frame);
+ if (!ret)
+ return 0;
- this_call_cnt = dht_frame_return (frame);
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_START_FAILED,
+ "%s: failed to create a new rebalance synctask", loc->path);
+ }
+ op_errno = EINVAL;
+ goto err;
+ }
- if (!xattr || (op_ret == -1)) {
- local->op_ret = op_ret;
- goto out;
+ tmp = dict_get(xattr, "decommission-brick");
+ if (tmp) {
+ /* This operation should happen only on '/' */
+ if (!__is_root_gfid(loc->inode->gfid)) {
+ op_errno = ENOTSUP;
+ goto err;
}
- if (dict_get (xattr, conf->xattr_name)) {
- dict_del (xattr, conf->xattr_name);
+ memcpy(value, tmp->data, min(tmp->len, 4095));
+ local->key = gf_strdup(value);
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ /* Get the pathinfo, and then compare */
+ STACK_WIND_COOKIE(frame, dht_checking_pathinfo_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->getxattr, loc,
+ GF_XATTR_PATHINFO_KEY, NULL);
}
+ return 0;
+ }
- if (frame->root->pid >= 0 ) {
- GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr);
- GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr);
+ tmp = dict_get(xattr, GF_XATTR_FIX_LAYOUT_KEY);
+ if (tmp) {
+ ret = dict_get_uint32(xattr, "new-commit-hash", &new_hash);
+ if (ret == 0) {
+ gf_msg_debug(this->name, 0,
+ "updating commit hash for %s from %u to %u",
+ uuid_utoa(loc->gfid), layout->commit_hash, new_hash);
+ layout->commit_hash = new_hash;
+
+ ret = dht_update_commit_hash_for_layout(frame);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ return ret;
}
- local->op_ret = 0;
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_FIX_LAYOUT_INFO,
+ "fixing the layout of %s", loc->path);
- if (!local->xattr) {
- local->xattr = dict_copy_with_ref (xattr, NULL);
- } else {
- dht_aggregate_xattr (local->xattr, xattr);
- }
-out:
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
- local->xattr, NULL);
+ ret = dht_fix_directory_layout(frame, dht_fix_layout_setxattr_cbk,
+ layout);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
}
- return 0;
+ return ret;
+ }
+
+ tmp = dict_get(xattr, "distribute.directory-spread-count");
+ if (tmp) {
+ /* Setxattr value is packed as 'binary', not string */
+ memcpy(value, tmp->data, min(tmp->len, 4095));
+ ret = gf_string2uint32(value, &dir_spread);
+ if (!ret && ((dir_spread <= conf->subvolume_cnt) && (dir_spread > 0))) {
+ layout->spread_cnt = dir_spread;
+
+ ret = dht_fix_directory_layout(frame, dht_common_setxattr_cbk,
+ layout);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ return ret;
+ }
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_OPERATION_NOT_SUP,
+ "wrong 'directory-spread-count' value (%s)", value);
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ tmp = dict_get(xattr, "glusterfs.dht.nuke");
+ if (tmp) {
+ return dht_nuke_dir(frame, this, loc, tmp);
+ }
+ local->xattr_req = xdata ? dict_ref(xdata) : dict_new();
+
+ if (IA_ISDIR(loc->inode->ia_type)) {
+ local->hashed_subvol = NULL;
+ ret = dht_dir_common_set_remove_xattr(frame, this, loc, NULL, xattr,
+ flags, xdata, &op_errno);
+ if (ret)
+ goto err;
+ } else {
+ local->rebalance.xattr = dict_ref(xattr);
+ local->rebalance.flags = flags;
+ local->call_cnt = 1;
+
+ ret = dict_set_int8(local->xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
+
+ STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
+ subvol->fops->setxattr, loc, xattr, flags,
+ local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
+
+ return 0;
}
-int32_t
-dht_getxattr_unwind (call_frame_t *frame,
- int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+static int
+dht_removexattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto err;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
+ local->rebalance.xdata);
return 0;
-}
+ }
+
+ if (subvol == NULL)
+ goto err;
+
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
+ subvol->fops->removexattr, &local->loc, local->key,
+ local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
+ subvol->fops->fremovexattr, local->fd, local->key,
+ local->xattr_req);
+ }
+
+ return 0;
+err:
+ DHT_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL);
+ return 0;
+}
int
-dht_getxattr_get_real_filename_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- dict_t *xattr, dict_t *xdata)
+dht_file_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- int this_call_cnt = 0;
- dht_local_t *local = NULL;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ struct iatt *stbuf = NULL;
+ inode_t *inode = NULL;
+ xlator_t *subvol1 = NULL, *subvol2 = NULL;
+ local = frame->local;
+ prev = cookie;
- local = frame->local;
+ local->op_errno = op_errno;
- LOCK (&frame->lock);
- {
- if (local->op_errno == ENODATA ||
- local->op_errno == EOPNOTSUPP) {
- /* Nothing to do here, we have already found
- * a subvol which does not have the get_real_filename
- * optimization. If condition is for simple logic.
- */
- goto unlock;
- }
+ if ((local->fop == GF_FOP_FREMOVEXATTR) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- if (op_ret == -1) {
-
- if (op_errno == ENODATA || op_errno == EOPNOTSUPP) {
- /* This subvol does not have the optimization.
- * Better let the user know we don't support it.
- * Remove previous results if any.
- */
-
- if (local->xattr) {
- dict_unref (local->xattr);
- local->xattr = NULL;
- }
-
- if (local->xattr_req) {
- dict_unref (local->xattr_req);
- local->xattr_req = NULL;
- }
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_WARNING, "At least "
- "one of the bricks does not support "
- "this operation. Please upgrade all "
- "bricks.");
- goto unlock;
- }
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto out;
+ }
- if (op_errno == ENOENT) {
- /* Do nothing, our defaults are set to this.
- */
- goto unlock;
- }
+ if (local->call_cnt != 1)
+ goto out;
- /* This is a place holder for every other error
- * case. I am not sure of how to interpret
- * ENOTCONN etc. As of now, choosing to ignore
- * down subvol and return a good result(if any)
- * from other subvol.
- */
- gf_log (this->name, GF_LOG_WARNING,
- "Failed to get real filename. "
- "error:%s", strerror (op_errno));
- goto unlock;
+ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
- }
+ if ((!op_ret) && !stbuf) {
+ goto out;
+ }
+ local->op_ret = 0;
- /* This subvol has the required file.
- * There could be other subvols which have returned
- * success already, choosing to return the latest good
- * result.
- */
- if (local->xattr)
- dict_unref (local->xattr);
- local->xattr = dict_ref (xattr);
+ local->rebalance.target_op_fn = dht_removexattr2;
+ if (xdata)
+ local->rebalance.xdata = dict_ref(xdata);
- if (local->xattr_req) {
- dict_unref (local->xattr_req);
- local->xattr_req = NULL;
- }
- if (xdata)
- local->xattr_req = dict_ref (xdata);
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(stbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
- local->op_ret = op_ret;
- local->op_errno = 0;
- gf_log (this->name, GF_LOG_DEBUG, "Found a matching "
- "file.");
- }
-unlock:
- UNLOCK (&frame->lock);
+ /* Phase 1 of migration */
+ if (IS_DHT_MIGRATION_PHASE1(stbuf)) {
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+ ret = dht_inode_ctx_get_mig_info(this, inode, &subvol1, &subvol2);
+ if (!dht_mig_info_is_invalid(local->cached_subvol, subvol1, subvol2)) {
+ dht_removexattr2(this, subvol2, frame, 0);
+ return 0;
+ }
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (getxattr, frame, local->op_ret,
- local->op_errno, local->xattr,
- local->xattr_req);
- }
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
- return 0;
+out:
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ DHT_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata);
+ } else {
+ DHT_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata);
+ }
+ return 0;
}
-
int
-dht_getxattr_get_real_filename (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *key, dict_t *xdata)
+dht_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *key, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int i = 0;
- dht_layout_t *layout = NULL;
- int cnt = 0;
- xlator_t *subvol = NULL;
-
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int call_cnt = 0;
+ dht_conf_t *conf = NULL;
+ int ret = 0;
+
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ GF_IF_NATIVE_XATTR_GOTO(conf->wild_xattr_name, key, op_errno, err);
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_REMOVEXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!local->layout) {
+ gf_msg_debug(this->name, 0, "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+
+ local->call_cnt = call_cnt = layout->cnt;
+ local->key = gf_strdup(key);
+
+ if (key && (strncmp(key, conf->mds_xattr_key, strlen(key)) == 0)) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ if (IA_ISDIR(loc->inode->ia_type)) {
+ local->hashed_subvol = NULL;
+ ret = dht_dir_common_set_remove_xattr(frame, this, loc, NULL, NULL, 0,
+ local->xattr_req, &op_errno);
+ if (ret)
+ goto err;
- local = frame->local;
- layout = local->layout;
+ } else {
+ local->call_cnt = 1;
+ ret = dict_set_int8(local->xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to "
+ "set dictionary key %s for %s",
+ DHT_IATT_IN_XDATA_KEY, loc->path);
+ }
- cnt = local->call_cnt = layout->cnt;
+ STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
+ subvol->fops->removexattr, loc, key,
+ local->xattr_req);
+ }
- local->op_ret = -1;
- local->op_errno = ENOENT;
+ return 0;
- for (i = 0; i < cnt; i++) {
- subvol = layout->list[i].xlator;
- STACK_WIND (frame, dht_getxattr_get_real_filename_cbk,
- subvol, subvol->fops->getxattr,
- loc, key, xdata);
- }
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
int
-dht_marker_populate_args (call_frame_t *frame, int type, int *gauge,
- xlator_t **subvols)
+dht_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int i = 0;
- dht_layout_t *layout = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int call_cnt = 0;
+ dht_conf_t *conf = 0;
+ int ret = 0;
+
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ GF_IF_NATIVE_XATTR_GOTO(conf->wild_xattr_name, key, op_errno, err);
+
+ VALIDATE_OR_GOTO(frame, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FREMOVEXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for inode=%s",
+ uuid_utoa(fd->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!local->layout) {
+ gf_msg_debug(this->name, 0, "no layout for inode=%s",
+ uuid_utoa(fd->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+ local->xattr_req = xdata ? dict_ref(xdata) : dict_new();
+
+ local->call_cnt = call_cnt = layout->cnt;
+ local->key = gf_strdup(key);
+
+ if (IA_ISDIR(fd->inode->ia_type)) {
+ local->hashed_subvol = NULL;
+ ret = dht_dir_common_set_remove_xattr(frame, this, NULL, fd, NULL, 0,
+ local->xattr_req, &op_errno);
+ if (ret)
+ goto err;
- local = frame->local;
- layout = local->layout;
+ } else {
+ local->call_cnt = 1;
+ ret = dict_set_int8(local->xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to "
+ "set dictionary key %s for fd=%p",
+ DHT_IATT_IN_XDATA_KEY, fd);
+ }
+
+ STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
+ subvol->fops->fremovexattr, fd, key,
+ local->xattr_req);
+ }
- for (i = 0; i < layout->cnt; i++)
- subvols[i] = layout->list[i].xlator;
+ return 0;
- return layout->cnt;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL);
+
+ return 0;
}
int
-dht_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *key, dict_t *xdata)
-#define DHT_IS_DIR(layout) (layout->cnt > 1)
+dht_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, fd_t *fd, dict_t *xdata)
{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
- xlator_t *subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- xlator_t *cached_subvol = NULL;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int op_errno = -1;
- int i = 0;
- int cnt = 0;
+ local = frame->local;
+ prev = cookie;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (this->private, err);
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto post_unlock;
+ }
- conf = this->private;
+ local->op_ret = 0;
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt))
+ DHT_STACK_UNWIND(open, frame, local->op_ret, local->op_errno, local->fd,
+ NULL);
+
+ return 0;
+}
- local = dht_local_init (frame, loc, NULL, GF_FOP_GETXATTR);
- if (!local) {
- op_errno = ENOMEM;
+/*
+ * dht_normalize_stats -
+ */
+static void
+dht_normalize_stats(struct statvfs *buf, unsigned long bsize,
+ unsigned long frsize)
+{
+ double factor = 0;
+
+ if (buf->f_bsize != bsize) {
+ buf->f_bsize = bsize;
+ }
+
+ if (buf->f_frsize != frsize) {
+ factor = ((double)buf->f_frsize) / frsize;
+ buf->f_frsize = frsize;
+ buf->f_blocks = (fsblkcnt_t)(factor * buf->f_blocks);
+ buf->f_bfree = (fsblkcnt_t)(factor * buf->f_bfree);
+ buf->f_bavail = (fsblkcnt_t)(factor * buf->f_bavail);
+ }
+}
- goto err;
+static int
+dht_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct statvfs *statvfs, dict_t *xdata)
+{
+ gf_boolean_t event = _gf_false;
+ qdstatfs_action_t action = qdstatfs_action_OFF;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ int bsize = 0;
+ int frsize = 0;
+ GF_UNUSED int ret = 0;
+ unsigned long new_usage = 0;
+ unsigned long cur_usage = 0;
+
+ local = frame->local;
+ GF_ASSERT(local);
+
+ if (xdata)
+ ret = dict_get_int8(xdata, "quota-deem-statfs", (int8_t *)&event);
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto unlock;
}
-
- layout = local->layout;
- if (!layout) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LAYOUT_NULL,
- "Layout is NULL");
- op_errno = ENOENT;
- goto err;
+ if (!statvfs) {
+ op_errno = EINVAL;
+ local->op_ret = -1;
+ goto unlock;
}
+ local->op_ret = 0;
- if (key) {
- local->key = gf_strdup (key);
- if (!local->key) {
- op_errno = ENOMEM;
- goto err;
- }
+ if (local->quota_deem_statfs) {
+ if (event == _gf_true) {
+ action = qdstatfs_action_COMPARE;
+ } else {
+ action = qdstatfs_action_NEGLECT;
+ }
+ } else {
+ if (event == _gf_true) {
+ action = qdstatfs_action_REPLACE;
+ local->quota_deem_statfs = _gf_true;
+ }
}
- if (key &&
- (strncmp (key, GF_XATTR_GET_REAL_FILENAME_KEY,
- strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)
- && DHT_IS_DIR(layout)) {
- dht_getxattr_get_real_filename (frame, this, loc, key, xdata);
- return 0;
- }
+ if (local->quota_deem_statfs) {
+ switch (action) {
+ case qdstatfs_action_NEGLECT:
+ goto unlock;
- /* for file use cached subvolume (obviously!): see if {}
- * below
- * for directory:
- * wind to all subvolumes and exclude subvolumes which
- * return ENOTCONN (in callback)
- *
- * NOTE: Don't trust inode here, as that may not be valid
- * (until inode_link() happens)
- */
- if (key && DHT_IS_DIR(layout) &&
- (XATTR_IS_PATHINFO (key)
- || (strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0))) {
- (void) strncpy (local->xsel, key, 256);
- cnt = local->call_cnt = layout->cnt;
- for (i = 0; i < cnt; i++) {
- subvol = layout->list[i].xlator;
- STACK_WIND (frame, dht_vgetxattr_dir_cbk,
- subvol, subvol->fops->getxattr,
- loc, key, xdata);
- }
- return 0;
- }
+ case qdstatfs_action_REPLACE:
+ local->statvfs = *statvfs;
+ goto unlock;
- /* node-uuid or pathinfo for files */
- if (key && ((strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0)
- || XATTR_IS_PATHINFO (key))) {
- cached_subvol = local->cached_subvol;
- (void) strncpy (local->xsel, key, 256);
+ case qdstatfs_action_COMPARE:
+ new_usage = statvfs->f_blocks - statvfs->f_bfree;
+ cur_usage = local->statvfs.f_blocks -
+ local->statvfs.f_bfree;
- local->call_cnt = 1;
- STACK_WIND (frame, dht_vgetxattr_cbk, cached_subvol,
- cached_subvol->fops->getxattr, loc, key, xdata);
+ /* Take the max of the usage from subvols */
+ if (new_usage >= cur_usage)
+ local->statvfs = *statvfs;
+ goto unlock;
- return 0;
+ default:
+ break;
+ }
}
- if (key && (strcmp (key, GF_XATTR_LINKINFO_KEY) == 0)) {
+ if (local->statvfs.f_bsize != 0) {
+ bsize = max(local->statvfs.f_bsize, statvfs->f_bsize);
+ frsize = max(local->statvfs.f_frsize, statvfs->f_frsize);
+ dht_normalize_stats(&local->statvfs, bsize, frsize);
+ dht_normalize_stats(statvfs, bsize, frsize);
+ } else {
+ local->statvfs.f_bsize = statvfs->f_bsize;
+ local->statvfs.f_frsize = statvfs->f_frsize;
+ }
+
+ local->statvfs.f_blocks += statvfs->f_blocks;
+ local->statvfs.f_bfree += statvfs->f_bfree;
+ local->statvfs.f_bavail += statvfs->f_bavail;
+ local->statvfs.f_files += statvfs->f_files;
+ local->statvfs.f_ffree += statvfs->f_ffree;
+ local->statvfs.f_favail += statvfs->f_favail;
+ local->statvfs.f_fsid = statvfs->f_fsid;
+ local->statvfs.f_flag = statvfs->f_flag;
+ local->statvfs.f_namemax = statvfs->f_namemax;
+ }
+unlock:
+ UNLOCK(&frame->lock);
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- if (!hashed_subvol) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_HASHED_SUBVOL_GET_FAILED,
- "Failed to get hashed subvol for %s",
- loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt))
+ DHT_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno,
+ &local->statvfs, xdata);
- cached_subvol = dht_subvol_get_cached (this, loc->inode);
- if (!cached_subvol) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_CACHED_SUBVOL_GET_FAILED,
- "Failed to get cached subvol for %s",
- loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ return 0;
+}
- if (hashed_subvol == cached_subvol) {
- op_errno = ENODATA;
- goto err;
- }
+int
+dht_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ int i = -1;
+ inode_t *inode = NULL;
+ inode_table_t *itable = NULL;
+ static uuid_t root_gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ loc_t newloc = {
+ 0,
+ };
- STACK_WIND (frame, dht_linkinfo_getxattr_cbk, hashed_subvol,
- hashed_subvol->fops->getxattr, loc,
- GF_XATTR_PATHINFO_KEY, xdata);
- return 0;
- }
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(this->private, err);
- if (key && (!strcmp (GF_XATTR_QUOTA_LIMIT_LIST, key) ||
- !strcmp (GF_XATTR_QUOTA_LIMIT_LIST_OBJECT, key))) {
- /* quota hardlimit and aggregated size of a directory is stored
- * in inode contexts of each brick. Hence its good enough that
- * we send getxattr for this key to any brick.
- */
- local->call_cnt = 1;
- subvol = dht_first_up_subvol (this);
- STACK_WIND (frame, dht_getxattr_cbk, subvol,
- subvol->fops->getxattr, loc, key, xdata);
- return 0;
- }
+ conf = this->private;
- if (cluster_handle_marker_getxattr (frame, loc, key, conf->vol_uuid,
- dht_getxattr_unwind,
- dht_marker_populate_args) == 0)
- return 0;
+ local = dht_local_init(frame, NULL, NULL, GF_FOP_STATFS);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- if (DHT_IS_DIR(layout)) {
- cnt = local->call_cnt = layout->cnt;
- } else {
- cnt = local->call_cnt = 1;
+ if (loc->inode && !IA_ISDIR(loc->inode->ia_type)) {
+ itable = loc->inode->table;
+ if (!itable) {
+ op_errno = EINVAL;
+ goto err;
}
- for (i = 0; i < cnt; i++) {
- subvol = layout->list[i].xlator;
- STACK_WIND (frame, dht_getxattr_cbk,
- subvol, subvol->fops->getxattr,
- loc, key, xdata);
+ loc = &local->loc2;
+
+ inode = inode_find(itable, root_gfid);
+ if (!inode) {
+ op_errno = EINVAL;
+ goto err;
}
- return 0;
+
+ dht_build_root_loc(inode, &newloc);
+ loc = &newloc;
+ }
+
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND(frame, dht_statfs_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->statfs, loc, xdata);
+ }
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-#undef DHT_IS_DIR
int
-dht_fgetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *key, dict_t *xdata)
-{
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int op_errno = -1;
- int i = 0;
- int cnt = 0;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
- VALIDATE_OR_GOTO (this->private, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_FGETXATTR);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
- layout = local->layout;
- if (!layout) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LAYOUT_NULL,
- "Layout is NULL");
- op_errno = ENOENT;
- goto err;
+dht_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int ret = 0;
+ gf_boolean_t new_xdata = _gf_false;
+ xlator_t **subvolumes = NULL;
+ int call_count = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ local = dht_local_init(frame, loc, fd, GF_FOP_OPENDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ local->first_up_subvol = dht_first_up_subvol(this);
+
+ if (!xdata) {
+ xdata = dict_new();
+ if (!xdata) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ new_xdata = _gf_true;
+ }
+
+ ret = dict_set_uint32(xdata, conf->link_xattr_name, 256);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value : key = %s",
+ conf->link_xattr_name);
+
+ /* dht_readdirp will wind to all subvols so open has to be sent to
+ * all subvols whether or not conf->local_subvols is set */
+
+ call_count = local->call_cnt = conf->subvolume_cnt;
+ subvolumes = conf->subvolumes;
+
+ /* In case of parallel-readdir, the readdir-ahead will be loaded
+ * below dht, in this case, if we want to enable or disable SKIP_DIRs
+ * it has to be done in opendir, so that prefetching logic in
+ * readdir-ahead, honors it */
+ for (i = 0; i < call_count; i++) {
+ if (conf->readdir_optimize == _gf_true) {
+ if (subvolumes[i] != local->first_up_subvol) {
+ ret = dict_set_int32(xdata, GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary"
+ " value :key = %s, ret:%d",
+ GF_READDIR_SKIP_DIRS, ret);
+ }
}
- if (key) {
- local->key = gf_strdup (key);
- if (!local->key) {
- op_errno = ENOMEM;
- goto err;
- }
- }
+ STACK_WIND_COOKIE(frame, dht_fd_cbk, subvolumes[i], subvolumes[i],
+ subvolumes[i]->fops->opendir, loc, fd, xdata);
+ dict_del(xdata, GF_READDIR_SKIP_DIRS);
+ }
- if ((fd->inode->ia_type == IA_IFDIR)
- && key
- && (strncmp (key, GF_XATTR_LOCKINFO_KEY,
- strlen (GF_XATTR_LOCKINFO_KEY)) != 0)) {
- cnt = local->call_cnt = layout->cnt;
- } else {
- cnt = local->call_cnt = 1;
- }
+ if (new_xdata)
+ dict_unref(xdata);
- for (i = 0; i < cnt; i++) {
- subvol = layout->list[i].xlator;
- STACK_WIND (frame, dht_getxattr_cbk,
- subvol, subvol->fops->fgetxattr,
- fd, key, NULL);
- }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(opendir, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-int
-dht_fsetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *xattr, int flags, dict_t *xdata)
+/* dht_readdirp_cbk creates a new dentry and dentry->inode is not assigned.
+ This functions assigns an inode if all of the following conditions are
+ true:
+
+ * DHT has only one child. In this case the entire layout is present on
+ this single child and hence we can set complete layout in inode.
+ * backend has complete layout and there are no anomalies in it and from
+ this information layout can be constructed and set in inode.
+*/
+
+static void
+dht_populate_inode_for_dentry(xlator_t *this, xlator_t *subvol,
+ gf_dirent_t *entry, gf_dirent_t *orig_entry)
{
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- int op_errno = EINVAL;
- dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+
+ if (gf_uuid_is_null(orig_entry->d_stat.ia_gfid)) {
+ /* this skips the '..' entry for the root of the volume */
+ return;
+ }
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
- VALIDATE_OR_GOTO (this->private, err);
+ gf_uuid_copy(loc.gfid, orig_entry->d_stat.ia_gfid);
+ loc.inode = inode_ref(orig_entry->inode);
- conf = this->private;
+ if (is_revalidate(&loc)) {
+ goto out;
+ }
- GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr,
- op_errno, err);
+ layout = dht_layout_new(this, 1);
+ if (!layout)
+ goto out;
- local = dht_local_init (frame, NULL, fd, GF_FOP_FSETXATTR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
+ ret = dht_layout_merge(this, layout, subvol, 0, 0, orig_entry->dict);
+ if (!ret) {
+ ret = dht_layout_normalize(this, &loc, layout);
+ if (ret == 0) {
+ dht_layout_set(this, orig_entry->inode, layout);
+ entry->inode = inode_ref(orig_entry->inode);
+ layout = NULL;
}
+ }
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ if (layout)
+ dht_layout_unref(this, layout);
- local->call_cnt = 1;
+out:
+ loc_wipe(&loc);
+ return;
+}
- STACK_WIND (frame, dht_err_cbk, subvol, subvol->fops->fsetxattr,
- fd, xattr, flags, NULL);
+/* Posix returns op_errno = ENOENT to indicate that there are no more
+ * entries
+ */
+static int
+dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *orig_entry = NULL;
+ gf_dirent_t *entry = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *next_subvol = NULL;
+ off_t next_offset = 0;
+ int count = 0;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+ xlator_t *subvol = 0;
+ xlator_t *hashed_subvol = 0;
+ int ret = 0;
+ int readdir_optimize = 0;
+ inode_table_t *itable = NULL;
+ inode_t *inode = NULL;
+ gf_boolean_t skip_hashed_check = _gf_false;
+
+ INIT_LIST_HEAD(&entries.list);
+
+ prev = cookie;
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO(this->name, local->fd, unwind);
+
+ itable = local->fd->inode->table;
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, unwind);
+
+ methods = &(conf->methods);
+
+ if (op_ret <= 0) {
+ goto done;
+ }
+
+ /* Why aren't we skipping DHT entirely in case of a single subvol?
+ * Because if this was a larger volume earlier and all but one subvol
+ * was removed, there might be stale linkto files on the subvol.
+ */
+ if (conf->subvolume_cnt == 1) {
+ /* return all directory and file entries except
+ * linkto files for a single child DHT
+ */
+ skip_hashed_check = _gf_true;
+ }
- return 0;
+ if (!local->layout)
+ local->layout = dht_layout_get(this, local->fd->inode);
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
+ layout = local->layout;
- return 0;
-}
+ /* This will skip the entries on the subvol without a layout,
+ * hence preventing the crash but rmdir might fail with
+ * "directory not empty" errors*/
+ if (layout == NULL)
+ goto done;
-static int
-dht_common_setxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+ if (conf->readdir_optimize == _gf_true)
+ readdir_optimize = 1;
- return 0;
-}
+ gf_msg_debug(this->name, 0, "Processing entries from %s", prev->name);
-int
-dht_checking_pathinfo_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr,
- dict_t *xdata)
-{
- int i = -1;
- int ret = -1;
- char *value = NULL;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- call_frame_t *prev = NULL;
- int this_call_cnt = 0;
+ list_for_each_entry(orig_entry, (&orig_entries->list), list)
+ {
+ next_offset = orig_entry->d_off;
- local = frame->local;
- prev = cookie;
- conf = this->private;
+ gf_msg_debug(this->name, 0, "%s: entry = %s, type = %d", prev->name,
+ orig_entry->d_name, orig_entry->d_type);
- if (op_ret == -1)
- goto out;
+ if (IA_ISINVAL(orig_entry->d_stat.ia_type)) {
+ /*stat failed somewhere- display this entry but the data may
+ * be inaccurate.
+ */
+ gf_msg_debug(this->name, EINVAL, "Invalid stat for %s (gfid %s)",
+ orig_entry->d_name,
+ uuid_utoa(orig_entry->d_stat.ia_gfid));
+ }
+ if (check_is_linkfile(NULL, (&orig_entry->d_stat), orig_entry->dict,
+ conf->link_xattr_name)) {
+ gf_msg_debug(this->name, 0, "%s: %s is a linkto file", prev->name,
+ orig_entry->d_name);
+ continue;
+ }
- ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value);
- if (ret)
- goto out;
+ if (skip_hashed_check) {
+ goto list;
+ }
- if (!strcmp (value, local->key)) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == prev->this)
- conf->decommissioned_bricks[i] = prev->this;
- }
+ if (check_is_dir(NULL, (&orig_entry->d_stat), NULL)) {
+ /*Directory entries filtering :
+ * a) If rebalance is running, pick from first_up_subvol
+ * b) (rebalance not running)hashed subvolume is NULL or
+ * down then filter in first_up_subvolume. Other wise the
+ * corresponding hashed subvolume will take care of the
+ * directory entry.
+ */
+ if (readdir_optimize) {
+ if (prev == local->first_up_subvol)
+ goto list;
+ else
+ continue;
+ }
+
+ hashed_subvol = methods->layout_search(this, layout,
+ orig_entry->d_name);
+
+ if (prev == hashed_subvol)
+ goto list;
+ if ((hashed_subvol && dht_subvol_status(conf, hashed_subvol)) ||
+ (prev != local->first_up_subvol))
+ continue;
+
+ goto list;
}
-out:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (setxattr, frame, local->op_ret, ENOTSUP, NULL);
+ list:
+ entry = gf_dirent_for_name(orig_entry->d_name);
+ if (!entry) {
+ goto unwind;
}
- return 0;
-}
+ /* Do this if conf->search_unhashed is set to "auto" */
+ if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) {
+ subvol = methods->layout_search(this, layout, orig_entry->d_name);
+ if (!subvol || (subvol != prev)) {
+ /* TODO: Count the number of entries which need
+ linkfile to prove its existence in fs */
+ layout->search_unhashed++;
+ }
+ }
-int
-dht_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr, int flags, dict_t *xdata)
-{
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- dht_methods_t *methods = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
- int op_errno = EINVAL;
- int ret = -1;
- data_t *tmp = NULL;
- uint32_t dir_spread = 0;
- char value[4096] = {0,};
- gf_dht_migrate_data_type_t forced_rebalance = GF_DHT_MIGRATE_DATA;
- int call_cnt = 0;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO (this->name, conf, err);
-
- methods = conf->methods;
- GF_VALIDATE_OR_GOTO (this->name, conf->methods, err);
-
- GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr,
- op_errno, err);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_SETXATTR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
+ entry->d_off = orig_entry->d_off;
+ entry->d_stat = orig_entry->d_stat;
+ entry->d_ino = orig_entry->d_ino;
+ entry->d_type = orig_entry->d_type;
+ entry->d_len = orig_entry->d_len;
+
+ if (orig_entry->dict)
+ entry->dict = dict_ref(orig_entry->dict);
+
+ /* making sure we set the inode ctx right with layout,
+ currently possible only for non-directories, so for
+ directories don't set entry inodes */
+ if (IA_ISDIR(entry->d_stat.ia_type)) {
+ entry->d_stat.ia_blocks = DHT_DIR_STAT_BLOCKS;
+ entry->d_stat.ia_size = DHT_DIR_STAT_SIZE;
+ if (orig_entry->inode) {
+ dht_inode_ctx_time_update(orig_entry->inode, this,
+ &entry->d_stat, 1);
+
+ if (conf->subvolume_cnt == 1) {
+ dht_populate_inode_for_dentry(this, prev, entry,
+ orig_entry);
+ }
+ }
+ } else {
+ if (orig_entry->dict &&
+ dict_get(orig_entry->dict, conf->link_xattr_name)) {
+ /* Strip out the S and T flags set by rebalance*/
+ DHT_STRIP_PHASE1_FLAGS(&entry->d_stat);
+ }
+
+ if (orig_entry->inode) {
+ ret = dht_layout_preset(this, prev, orig_entry->inode);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_SET_FAILED,
+ "failed to link the layout "
+ "in inode for %s",
+ orig_entry->d_name);
+
+ entry->inode = inode_ref(orig_entry->inode);
+ } else if (itable) {
+ /*
+ * orig_entry->inode might be null if any upper
+ * layer xlators below client set to null, to
+ * force a lookup on the inode even if the inode
+ * is present in the inode table. In that case
+ * we just update the ctx to make sure we didn't
+ * missed anything.
+ */
+ inode = inode_find(itable, orig_entry->d_stat.ia_gfid);
+ if (inode) {
+ ret = dht_layout_preset(this, prev, inode);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_SET_FAILED,
+ "failed to link the layout"
+ " in inode for %s",
+ orig_entry->d_name);
+ inode_unref(inode);
+ inode = NULL;
+ }
+ }
}
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for path=%s",
- loc->path);
- op_errno = EINVAL;
- goto err;
+ gf_msg_debug(this->name, 0, "%s: Adding entry = %s", prev->name,
+ entry->d_name);
+
+ list_add_tail(&entry->list, &entries.list);
+ count++;
+ }
+
+done:
+
+ /* We need to ensure that only the last subvolume's end-of-directory
+ * notification is respected so that directory reading does not stop
+ * before all subvolumes have been read. That could happen because the
+ * posix for each subvolume sends a ENOENT on end-of-directory but in
+ * distribute we're not concerned only with a posix's view of the
+ * directory but the aggregated namespace' view of the directory.
+ * Possible values:
+ * op_ret == 0 and op_errno != 0
+ * if op_errno != ENOENT : Error.Unwind.
+ * if op_errno == ENOENT : There are no more entries on this subvol.
+ * Move to the next one.
+ * op_ret > 0 and count == 0 :
+ * The subvol returned entries to dht but all were stripped out.
+ * For example, if they were linkto files or dirs where
+ * hashed_subvol != prev. Try to get some entries by winding
+ * to the next subvol. This can be dangerous if parallel readdir
+ * is enabled as it grows the stack.
+ *
+ * op_ret > 0 and count > 0:
+ * We found some entries. Unwind even if the buffer is not full.
+ *
+ */
+
+ op_ret = count;
+ if (count == 0) {
+ /* non-zero next_offset means that
+ * EOF is not yet hit on the current subvol
+ */
+ if ((next_offset == 0) || (op_errno == ENOENT)) {
+ next_offset = 0;
+ next_subvol = dht_subvol_next(this, prev);
+ } else {
+ next_subvol = prev;
}
- layout = local->layout;
- if (!layout) {
- gf_msg_debug (this->name, 0,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
+ if (!next_subvol) {
+ goto unwind;
}
- local->call_cnt = call_cnt = layout->cnt;
+ if (conf->readdir_optimize == _gf_true) {
+ if (next_subvol != local->first_up_subvol) {
+ ret = dict_set_int32(local->xattr, GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value"
+ ":key = %s",
+ GF_READDIR_SKIP_DIRS);
+ } else {
+ dict_del(local->xattr, GF_READDIR_SKIP_DIRS);
+ }
+ }
+
+ STACK_WIND_COOKIE(frame, dht_readdirp_cbk, next_subvol, next_subvol,
+ next_subvol->fops->readdirp, local->fd, local->size,
+ next_offset, local->xattr);
+ return 0;
+ }
- tmp = dict_get (xattr, GF_XATTR_FILE_MIGRATE_KEY);
- if (tmp) {
+unwind:
+ /* We need to ensure that only the last subvolume's end-of-directory
+ * notification is respected so that directory reading does not stop
+ * before all subvolumes have been read. That could happen because the
+ * posix for each subvolume sends a ENOENT on end-of-directory but in
+ * distribute we're not concerned only with a posix's view of the
+ * directory but the aggregated namespace' view of the directory.
+ */
+ if (op_ret < 0)
+ op_ret = 0;
+
+ if (prev != dht_last_up_subvol(this))
+ op_errno = 0;
+
+ DHT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries, NULL);
+
+ gf_dirent_free(&entries);
+ return 0;
+}
- if (IA_ISDIR (loc->inode->ia_type)) {
- op_errno = ENOTSUP;
- goto err;
- }
+static int
+dht_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *orig_entry = NULL;
+ gf_dirent_t *entry = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *next_subvol = NULL;
+ off_t next_offset = 0;
+ int count = 0;
+ dht_layout_t *layout = 0;
+ xlator_t *subvol = 0;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+ gf_boolean_t skip_hashed_check = _gf_false;
- /* TODO: need to interpret the 'value' for more meaning
- (ie, 'target' subvolume given there, etc) */
- memcpy (value, tmp->data, tmp->len);
- if (strcmp (value, "force") == 0)
- forced_rebalance =
- GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS;
+ INIT_LIST_HEAD(&entries.list);
- if (conf->decommission_in_progress)
- forced_rebalance = GF_DHT_MIGRATE_HARDLINK;
+ prev = cookie;
+ local = frame->local;
- if (!loc->path) {
- op_errno = EINVAL;
- goto err;
- }
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, done);
- if (!local->loc.name)
- local->loc.name = strrchr (local->loc.path, '/')+1;
+ methods = &(conf->methods);
- if (!local->loc.parent)
- local->loc.parent =
- inode_parent(local->loc.inode, NULL, NULL);
+ if (op_ret <= 0)
+ goto done;
- if ((!local->loc.name) || (!local->loc.parent)) {
- op_errno = EINVAL;
- goto err;
- }
+ if (!local->layout)
+ local->layout = dht_layout_get(this, local->fd->inode);
- methods->migration_get_dst_subvol(this, local);
+ layout = local->layout;
- if (!local->rebalance.target_node) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_HASHED_SUBVOL_GET_FAILED,
- "Failed to get hashed subvol for %s",
- loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ gf_msg_debug(this->name, 0, "Processing entries from %s", prev->name);
- local->rebalance.from_subvol = local->cached_subvol;
+ if (conf->subvolume_cnt == 1) {
+ /*return everything*/
+ skip_hashed_check = _gf_true;
+ count = op_ret;
+ goto done;
+ }
- if (local->rebalance.target_node == local->rebalance.from_subvol) {
- op_errno = EEXIST;
- goto err;
- }
- if (local->rebalance.target_node) {
- local->flags = forced_rebalance;
+ list_for_each_entry(orig_entry, (&orig_entries->list), list)
+ {
+ next_offset = orig_entry->d_off;
- ret = dht_start_rebalance_task (this, frame);
- if (!ret)
- return 0;
+ gf_msg_debug(this->name, 0, "%s: entry = %s, type = %d", prev->name,
+ orig_entry->d_name, orig_entry->d_type);
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_REBALANCE_START_FAILED,
- "%s: failed to create a new rebalance synctask",
- loc->path);
- }
- op_errno = EINVAL;
- goto err;
+ subvol = methods->layout_search(this, layout, orig_entry->d_name);
- }
+ if (!subvol || (subvol == prev)) {
+ entry = gf_dirent_for_name(orig_entry->d_name);
+ if (!entry) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "Memory allocation failed ");
+ goto unwind;
+ }
- tmp = dict_get (xattr, "decommission-brick");
- if (tmp) {
- /* This operation should happen only on '/' */
- if (!__is_root_gfid (loc->inode->gfid)) {
- op_errno = ENOTSUP;
- goto err;
- }
+ entry->d_off = orig_entry->d_off;
+ entry->d_ino = orig_entry->d_ino;
+ entry->d_type = orig_entry->d_type;
+ entry->d_len = orig_entry->d_len;
- memcpy (value, tmp->data, ((tmp->len < 4095) ? tmp->len : 4095));
- local->key = gf_strdup (value);
- local->call_cnt = conf->subvolume_cnt;
+ gf_msg_debug(this->name, 0, "%s: Adding = entry %s", prev->name,
+ entry->d_name);
- for (i = 0 ; i < conf->subvolume_cnt; i++) {
- /* Get the pathinfo, and then compare */
- STACK_WIND (frame, dht_checking_pathinfo_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->getxattr,
- loc, GF_XATTR_PATHINFO_KEY, NULL);
- }
- return 0;
+ list_add_tail(&entry->list, &entries.list);
+ count++;
+ }
+ }
+done:
+ op_ret = count;
+ /* We need to ensure that only the last subvolume's end-of-directory
+ * notification is respected so that directory reading does not stop
+ * before all subvolumes have been read. That could happen because the
+ * posix for each subvolume sends a ENOENT on end-of-directory but in
+ * distribute we're not concerned only with a posix's view of the
+ * directory but the aggregated namespace' view of the directory.
+ */
+ if (count == 0) {
+ if ((next_offset == 0) || (op_errno == ENOENT)) {
+ next_offset = 0;
+ next_subvol = dht_subvol_next(this, prev);
+ } else {
+ next_subvol = prev;
}
- tmp = dict_get (xattr, GF_XATTR_FIX_LAYOUT_KEY);
- if (tmp) {
- gf_log (this->name, GF_LOG_INFO,
- "fixing the layout of %s", loc->path);
+ if (!next_subvol) {
+ goto unwind;
+ }
- ret = dht_fix_directory_layout (frame, dht_common_setxattr_cbk,
- layout);
- if (ret) {
- op_errno = ENOTCONN;
- goto err;
+ STACK_WIND_COOKIE(frame, dht_readdir_cbk, next_subvol, next_subvol,
+ next_subvol->fops->readdir, local->fd, local->size,
+ next_offset, NULL);
+ return 0;
+ }
+
+unwind:
+ /* We need to ensure that only the last subvolume's end-of-directory
+ * notification is respected so that directory reading does not stop
+ * before all subvolumes have been read. That could happen because the
+ * posix for each subvolume sends a ENOENT on end-of-directory but in
+ * distribute we're not concerned only with a posix's view of the
+ * directory but the aggregated namespace' view of the directory.
+ */
+
+ if (prev != dht_last_up_subvol(this))
+ op_errno = 0;
+
+ if (!skip_hashed_check) {
+ DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, NULL);
+ gf_dirent_free(&entries);
+
+ } else {
+ DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, orig_entries, NULL);
+ }
+ return 0;
+}
+
+static int
+dht_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff, int whichop, dict_t *dict)
+{
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ xlator_t *xvol = NULL;
+ int ret = 0;
+ dht_conf_t *conf = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ local = dht_local_init(frame, NULL, NULL, whichop);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->fd = fd_ref(fd);
+ local->size = size;
+ local->xattr_req = (dict) ? dict_ref(dict) : NULL;
+ local->first_up_subvol = dht_first_up_subvol(this);
+ local->op_ret = -1;
+
+ dht_deitransform(this, yoff, &xvol);
+
+ /* TODO: do proper readdir */
+ if (whichop == GF_FOP_READDIRP) {
+ if (dict)
+ local->xattr = dict_ref(dict);
+ else
+ local->xattr = dict_new();
+
+ if (local->xattr) {
+ ret = dict_set_uint32(local->xattr, conf->link_xattr_name, 256);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value"
+ " : key = %s",
+ conf->link_xattr_name);
+
+ if (conf->readdir_optimize == _gf_true) {
+ if (xvol != local->first_up_subvol) {
+ ret = dict_set_int32(local->xattr, GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "dictionary value: "
+ "key = %s",
+ GF_READDIR_SKIP_DIRS);
+ } else {
+ dict_del(local->xattr, GF_READDIR_SKIP_DIRS);
}
- return ret;
- }
+ }
- tmp = dict_get (xattr, "distribute.directory-spread-count");
- if (tmp) {
- /* Setxattr value is packed as 'binary', not string */
- memcpy (value, tmp->data, ((tmp->len < 4095)?tmp->len:4095));
- ret = gf_string2uint32 (value, &dir_spread);
- if (!ret && ((dir_spread <= conf->subvolume_cnt) &&
- (dir_spread > 0))) {
- layout->spread_cnt = dir_spread;
-
- ret = dht_fix_directory_layout (frame,
- dht_common_setxattr_cbk,
- layout);
- if (ret) {
- op_errno = ENOTCONN;
- goto err;
- }
- return ret;
+ if (conf->subvolume_cnt == 1) {
+ ret = dict_set_uint32(local->xattr, conf->xattr_name, 4 * 4);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary "
+ "value:key = %s ",
+ conf->xattr_name);
}
- gf_log (this->name, GF_LOG_ERROR,
- "wrong 'directory-spread-count' value (%s)", value);
- op_errno = ENOTSUP;
- goto err;
+ }
}
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_err_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->setxattr,
- loc, xattr, flags, xdata);
- }
+ STACK_WIND_COOKIE(frame, dht_readdirp_cbk, xvol, xvol,
+ xvol->fops->readdirp, fd, size, yoff, local->xattr);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_readdir_cbk, xvol, xvol,
+ xvol->fops->readdir, fd, size, yoff, local->xattr);
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-
int
-dht_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+dht_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ int op = GF_FOP_READDIR;
+ dht_conf_t *conf = NULL;
+ int i = 0;
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_msg_debug (this->name, 0,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ conf = this->private;
+ if (!conf)
+ goto out;
- local->op_ret = 0;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ op = GF_FOP_READDIRP;
+ break;
}
-unlock:
- UNLOCK (&frame->lock);
+ }
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (removexattr, frame, local->op_ret,
- local->op_errno, NULL);
- }
+ if (conf->use_readdirp)
+ op = GF_FOP_READDIRP;
- return 0;
+out:
+ dht_do_readdir(frame, this, fd, size, yoff, op, 0);
+ return 0;
}
-
int
-dht_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *key, dict_t *xdata)
+dht_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff, dict_t *dict)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int call_cnt = 0;
- dht_conf_t *conf = NULL;
- int i;
+ dht_do_readdir(frame, this, fd, size, yoff, GF_FOP_READDIRP, dict);
+ return 0;
+}
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (this->private, err);
+static int
+dht_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
- conf = this->private;
+ local = frame->local;
- GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err);
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1)
+ local->op_errno = op_errno;
+ else if (op_ret == 0)
+ local->op_ret = 0;
+ }
+ UNLOCK(&frame->lock);
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt))
+ DHT_STACK_UNWIND(fsyncdir, frame, local->op_ret, local->op_errno,
+ xdata);
+
+ return 0;
+}
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
+int
+dht_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ int i = -1;
- local = dht_local_init (frame, loc, NULL, GF_FOP_REMOVEXATTR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+ VALIDATE_OR_GOTO(this->private, err);
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ conf = this->private;
- layout = local->layout;
- if (!local->layout) {
- gf_msg_debug (this->name, 0,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local = dht_local_init(frame, NULL, NULL, GF_FOP_FSYNCDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- local->call_cnt = call_cnt = layout->cnt;
- local->key = gf_strdup (key);
+ local->fd = fd_ref(fd);
+ local->call_cnt = conf->subvolume_cnt;
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_removexattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->removexattr,
- loc, key, NULL);
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND(frame, dht_fsyncdir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->fsyncdir, fd, datasync, xdata);
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fsyncdir, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
int
-dht_fremovexattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *key, dict_t *xdata)
+dht_newfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int call_cnt = 0;
- dht_conf_t *conf = 0;
-
- int i;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+ if (op_ret == -1)
+ goto out;
+
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ prev = cookie;
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0);
+ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
+ }
+
+ ret = dht_layout_preset(this, prev, inode);
+ if (ret < 0) {
+ gf_msg_debug(this->name, EINVAL,
+ "could not set pre-set layout for subvolume %s",
+ prev ? prev->name : NULL);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ if (local->linked == _gf_true)
+ dht_linkfile_attr_heal(frame, this);
+out:
+ /*
+ * FIXME: ia_size and st_blocks of preparent and postparent do not have
+ * correct values. since, preparent and postparent buffers correspond
+ * to a directory these two members should have values equal to sum of
+ * corresponding values from each of the subvolume.
+ * See dht_iatt_merge for reference.
+ */
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ dht_set_fixed_dir_stat(postparent);
+ dht_set_fixed_dir_stat(preparent);
+
+ if (local && local->lock[0].layout.parent_layout.locks) {
+ /* store op_errno for failure case*/
+ local->op_errno = op_errno;
+ local->refresh_layout_unlock(frame, this, op_ret, 1);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (this->private, err);
+ if (op_ret == 0) {
+ DHT_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, stbuf,
+ preparent, postparent, xdata);
+ }
+ } else {
+ DHT_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, stbuf,
+ preparent, postparent, xdata);
+ }
- conf = this->private;
+ return 0;
+}
- GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err);
+static int
+dht_mknod_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
- VALIDATE_OR_GOTO (frame, err);
+ local = frame->local;
- local = dht_local_init (frame, NULL, fd, GF_FOP_FREMOVEXATTR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for inode=%s",
- uuid_utoa (fd->inode->gfid));
- op_errno = EINVAL;
- goto err;
- }
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto err;
+ }
- layout = local->layout;
- if (!local->layout) {
- gf_msg_debug (this->name, 0,
- "no layout for inode=%s",
- uuid_utoa (fd->inode->gfid));
- op_errno = EINVAL;
- goto err;
- }
+ conf = this->private;
+ if (!conf) {
+ local->op_errno = EINVAL;
+ op_errno = EINVAL;
+ goto err;
+ }
- local->call_cnt = call_cnt = layout->cnt;
- local->key = gf_strdup (key);
+ cached_subvol = local->cached_subvol;
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_removexattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->fremovexattr,
- fd, key, NULL);
- }
+ if (local->params) {
+ dict_del(local->params, conf->link_xattr_name);
+ dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY);
+ }
- return 0;
+ STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)cached_subvol,
+ cached_subvol, cached_subvol->fops->mknod, &local->loc,
+ local->mode, local->rdev, local->umask, local->params);
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL);
-
- return 0;
+ if (local && local->lock[0].layout.parent_layout.locks) {
+ local->refresh_layout_unlock(frame, this, -1, 1);
+ } else {
+ DHT_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+ }
+ return 0;
}
-
-int
-dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+static int
+dht_mknod_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
+ xlator_t *subvol, loc_t *loc, dev_t rdev,
+ mode_t mode, mode_t umask, dict_t *params)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *avail_subvol = NULL;
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_msg_debug (this->name, 0,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ local = frame->local;
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
+ if (!dht_is_subvol_filled(this, subvol)) {
+ gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
+ subvol->name);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
- local->fd, NULL);
+ STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask, params);
+ } else {
+ avail_subvol = dht_free_disk_available_subvol(this, subvol, local);
- return 0;
-}
+ if (avail_subvol != subvol) {
+ local->params = dict_ref(params);
+ local->rdev = rdev;
+ local->mode = mode;
+ local->umask = umask;
+ local->cached_subvol = avail_subvol;
+ local->hashed_subvol = subvol;
-/*
- * dht_normalize_stats -
- */
-static void
-dht_normalize_stats (struct statvfs *buf, unsigned long bsize,
- unsigned long frsize)
-{
- double factor = 0;
+ gf_msg_debug(this->name, 0, "creating %s on %s (link at %s)",
+ loc->path, avail_subvol->name, subvol->name);
- if (buf->f_bsize != bsize) {
- buf->f_bsize = bsize;
+ dht_linkfile_create(frame, dht_mknod_linkfile_create_cbk, this,
+ avail_subvol, subvol, loc);
+
+ goto out;
}
- if (buf->f_frsize != frsize) {
- factor = ((double) buf->f_frsize) / frsize;
- buf->f_frsize = frsize;
- buf->f_blocks = (fsblkcnt_t) (factor * buf->f_blocks);
- buf->f_bfree = (fsblkcnt_t) (factor * buf->f_bfree);
- buf->f_bavail = (fsblkcnt_t) (factor * buf->f_bavail);
+ gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
+ subvol->name);
- }
+ STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask, params);
+ }
+out:
+ return 0;
}
-int
-dht_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct statvfs *statvfs,
- dict_t *xdata)
+static int32_t
+dht_mknod_do(call_frame_t *frame)
{
+ dht_local_t *local = NULL;
+ dht_layout_t *refreshed = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
- gf_boolean_t event = _gf_false;
- qdstatfs_action_t action = qdstatfs_action_OFF;
- dht_local_t * local = NULL;
- int this_call_cnt = 0;
- int bsize = 0;
- int frsize = 0;
- GF_UNUSED int ret = 0;
- unsigned long new_usage = 0;
- unsigned long cur_usage = 0;
+ local = frame->local;
- local = frame->local;
- GF_ASSERT (local);
+ this = THIS;
- if (xdata)
- ret = dict_get_int8 (xdata, "quota-deem-statfs",
- (int8_t *)&event);
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto unlock;
- }
- if (!statvfs) {
- op_errno = EINVAL;
- local->op_ret = -1;
- goto unlock;
- }
- local->op_ret = 0;
+ conf = this->private;
- switch (local->quota_deem_statfs) {
- case _gf_true:
- if (event == _gf_true)
- action = qdstatfs_action_COMPARE;
- else
- action = qdstatfs_action_NEGLECT;
- break;
-
- case _gf_false:
- if (event == _gf_true) {
- action = qdstatfs_action_REPLACE;
- local->quota_deem_statfs = _gf_true;
- }
- break;
+ GF_VALIDATE_OR_GOTO(this->name, conf, err);
- default:
- gf_log (this->name, GF_LOG_ERROR, "Encountered third "
- "value for boolean variable %d",
- local->quota_deem_statfs);
- break;
- }
+ methods = &(conf->methods);
- if (local->quota_deem_statfs) {
- switch (action) {
- case qdstatfs_action_NEGLECT:
- goto unlock;
+ /* We don't need parent_loc anymore */
+ loc_wipe(&local->loc);
- case qdstatfs_action_REPLACE:
- local->statvfs = *statvfs;
- goto unlock;
+ loc_copy(&local->loc, &local->loc2);
- case qdstatfs_action_COMPARE:
- new_usage = statvfs->f_blocks -
- statvfs->f_bfree;
- cur_usage = local->statvfs.f_blocks -
- local->statvfs.f_bfree;
+ loc_wipe(&local->loc2);
- /* Take the max of the usage from subvols */
- if (new_usage >= cur_usage)
- local->statvfs = *statvfs;
- goto unlock;
+ refreshed = local->selfheal.refreshed_layout;
- default:
- break;
- }
- }
+ subvol = methods->layout_search(this, refreshed, local->loc.name);
- if (local->statvfs.f_bsize != 0) {
- bsize = max(local->statvfs.f_bsize, statvfs->f_bsize);
- frsize = max(local->statvfs.f_frsize, statvfs->f_frsize);
- dht_normalize_stats(&local->statvfs, bsize, frsize);
- dht_normalize_stats(statvfs, bsize, frsize);
- } else {
- local->statvfs.f_bsize = statvfs->f_bsize;
- local->statvfs.f_frsize = statvfs->f_frsize;
- }
+ if (!subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "no subvolume in "
+ "layout for path=%s",
+ local->loc.path);
+ local->op_errno = ENOENT;
+ goto err;
+ }
- local->statvfs.f_blocks += statvfs->f_blocks;
- local->statvfs.f_bfree += statvfs->f_bfree;
- local->statvfs.f_bavail += statvfs->f_bavail;
- local->statvfs.f_files += statvfs->f_files;
- local->statvfs.f_ffree += statvfs->f_ffree;
- local->statvfs.f_favail += statvfs->f_favail;
- local->statvfs.f_fsid = statvfs->f_fsid;
- local->statvfs.f_flag = statvfs->f_flag;
- local->statvfs.f_namemax = statvfs->f_namemax;
+ dht_mknod_wind_to_avail_subvol(frame, this, subvol, &local->loc,
+ local->rdev, local->mode, local->umask,
+ local->params);
+ return 0;
+err:
+ local->refresh_layout_unlock(frame, this, -1, 1);
+ return 0;
+}
- }
-unlock:
- UNLOCK (&frame->lock);
+static int32_t
+dht_mknod_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
+static int32_t
+dht_mknod_finish(call_frame_t *frame, xlator_t *this, int op_ret,
+ int invoke_cbk)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
+
+ local = frame->local;
+ lock_count = dht_lock_count(local->lock[0].layout.parent_layout.locks,
+ local->lock[0].layout.parent_layout.lk_count);
+ if (lock_count == 0)
+ goto done;
+
+ lock_frame = copy_frame(frame);
+ if (lock_frame == NULL) {
+ goto done;
+ }
+
+ lock_local = dht_local_init(lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL) {
+ goto done;
+ }
+
+ lock_local->lock[0]
+ .layout.parent_layout.locks = local->lock[0].layout.parent_layout.locks;
+ lock_local->lock[0].layout.parent_layout.lk_count =
+ local->lock[0].layout.parent_layout.lk_count;
+
+ local->lock[0].layout.parent_layout.locks = NULL;
+ local->lock[0].layout.parent_layout.lk_count = 0;
+
+ dht_unlock_inodelk(lock_frame,
+ lock_local->lock[0].layout.parent_layout.locks,
+ lock_local->lock[0].layout.parent_layout.lk_count,
+ dht_mknod_unlock_cbk);
+ lock_frame = NULL;
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
- &local->statvfs, xdata);
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY(lock_frame);
+ }
+ if (op_ret == 0)
return 0;
-}
+ DHT_STACK_UNWIND(mknod, frame, op_ret, local->op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
+}
-int
-dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+static int32_t
+dht_mknod_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- int i = -1;
+ dht_local_t *local = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (this->private, err);
+ local = frame->local;
- conf = this->private;
+ if (!local) {
+ goto err;
+ }
- local = dht_local_init (frame, NULL, NULL, GF_FOP_STATFS);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ if (op_ret < 0) {
+ gf_msg("DHT", GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "mknod lock failed for file: %s", local->loc2.name);
- if (!loc->inode || IA_ISDIR (loc->inode->ia_type)) {
- local->call_cnt = conf->subvolume_cnt;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_statfs_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->statfs, loc,
- xdata);
- }
- return 0;
- }
+ local->op_errno = op_errno;
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ goto err;
+ }
- local->call_cnt = 1;
+ local->refresh_layout_unlock = dht_mknod_finish;
- STACK_WIND (frame, dht_statfs_cbk,
- subvol, subvol->fops->statfs, loc, xdata);
+ local->refresh_layout_done = dht_mknod_do;
- return 0;
+ dht_refresh_layout(frame);
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
-
- return 0;
+ if (local)
+ dht_mknod_finish(frame, this, -1, 0);
+ else
+ DHT_STACK_UNWIND(mknod, frame, -1, EINVAL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
-
-int
-dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
- dict_t *xdata)
+static int32_t
+dht_mknod_lock(call_frame_t *frame, xlator_t *subvol)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- int i = -1;
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1;
+ dht_lock_t **lk_array = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (this->private, err);
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO(frame->this->name, frame->local, err);
- conf = this->private;
+ local = frame->local;
- local = dht_local_init (frame, loc, fd, GF_FOP_OPENDIR);
- if (!local) {
- op_errno = ENOMEM;
+ lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
- goto err;
- }
+ if (lk_array == NULL)
+ goto err;
- local->call_cnt = conf->subvolume_cnt;
+ lk_array[0] = dht_lock_new(frame->this, subvol, &local->loc, F_RDLCK,
+ DHT_LAYOUT_HEAL_DOMAIN, NULL,
+ IGNORE_ENOENT_ESTALE);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_fd_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->opendir,
- loc, fd, xdata);
- }
+ if (lk_array[0] == NULL)
+ goto err;
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL);
+ local->lock[0].layout.parent_layout.locks = lk_array;
+ local->lock[0].layout.parent_layout.lk_count = count;
- return 0;
-}
+ ret = dht_blocking_inodelk(frame, lk_array, count, dht_mknod_lock_cbk);
-int
-dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- gf_dirent_t entries;
- gf_dirent_t *orig_entry = NULL;
- gf_dirent_t *entry = NULL;
- call_frame_t *prev = NULL;
- xlator_t *next_subvol = NULL;
- off_t next_offset = 0;
- int count = 0;
- dht_layout_t *layout = 0;
- dht_conf_t *conf = NULL;
- dht_methods_t *methods = NULL;
- xlator_t *subvol = 0;
- xlator_t *hashed_subvol = 0;
- int ret = 0;
- int readdir_optimize = 0;
-
- INIT_LIST_HEAD (&entries.list);
- prev = cookie;
- local = frame->local;
+ if (ret < 0) {
+ local->lock[0].layout.parent_layout.locks = NULL;
+ local->lock[0].layout.parent_layout.lk_count = 0;
+ goto err;
+ }
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, unwind);
+ return 0;
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free(lk_array, count);
+ GF_FREE(lk_array);
+ }
- methods = conf->methods;
- GF_VALIDATE_OR_GOTO(this->name, conf->methods, done);
+ return -1;
+}
- if (op_ret < 0)
- goto done;
+static int
+dht_refresh_parent_layout_resume(call_frame_t *frame, xlator_t *this, int ret,
+ int invoke_cbk)
+{
+ dht_local_t *local = NULL, *parent_local = NULL;
+ call_stub_t *stub = NULL;
+ call_frame_t *parent_frame = NULL;
- if (!local->layout)
- local->layout = dht_layout_get (this, local->fd->inode);
+ local = frame->local;
- layout = local->layout;
+ stub = local->stub;
+ local->stub = NULL;
- if (conf->readdir_optimize == _gf_true)
- readdir_optimize = 1;
+ parent_frame = stub->frame;
+ parent_local = parent_frame->local;
- list_for_each_entry (orig_entry, (&orig_entries->list), list) {
- next_offset = orig_entry->d_off;
+ if (ret < 0) {
+ parent_local->op_ret = -1;
+ parent_local->op_errno = local->op_errno ? local->op_errno : EIO;
+ } else {
+ parent_local->op_ret = 0;
+ }
- if (IA_ISINVAL(orig_entry->d_stat.ia_type)) {
- /*stat failed somewhere- ignore this entry*/
- continue;
- }
+ call_resume(stub);
- if (check_is_dir (NULL, (&orig_entry->d_stat), NULL)) {
+ DHT_STACK_DESTROY(frame);
- /*Directory entries filtering :
- * a) If rebalance is running, pick from first_up_subvol
- * b) (rebalance not running)hashed subvolume is NULL or
- * down then filter in first_up_subvolume. Other wise the
- * corresponding hashed subvolume will take care of the
- * directory entry.
- */
+ return 0;
+}
- if (readdir_optimize) {
- if (prev->this == local->first_up_subvol)
- goto list;
- else
- continue;
+static int
+dht_refresh_parent_layout_done(call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ int ret = 0;
- }
+ local = frame->local;
- hashed_subvol = methods->layout_search (this, layout,
- orig_entry->d_name);
+ if (local->op_ret < 0) {
+ ret = -1;
+ goto resume;
+ }
- if (prev->this == hashed_subvol)
- goto list;
- if ((hashed_subvol
- && dht_subvol_status (conf, hashed_subvol))
- ||(prev->this != local->first_up_subvol))
- continue;
+ dht_layout_set(frame->this, local->loc.inode,
+ local->selfheal.refreshed_layout);
- goto list;
- }
+resume:
+ dht_refresh_parent_layout_resume(frame, frame->this, ret, 1);
+ return 0;
+}
- if (check_is_linkfile (NULL, (&orig_entry->d_stat),
- orig_entry->dict,
- conf->link_xattr_name)) {
- continue;
- }
-list:
- entry = gf_dirent_for_name (orig_entry->d_name);
- if (!entry) {
+static int
+dht_handle_parent_layout_change(xlator_t *this, call_stub_t *stub)
+{
+ call_frame_t *refresh_frame = NULL, *frame = NULL;
+ dht_local_t *refresh_local = NULL, *local = NULL;
- goto unwind;
- }
+ frame = stub->frame;
+ local = frame->local;
- /* Do this if conf->search_unhashed is set to "auto" */
- if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) {
- subvol = methods->layout_search (this, layout,
- orig_entry->d_name);
- if (!subvol || (subvol != prev->this)) {
- /* TODO: Count the number of entries which need
- linkfile to prove its existence in fs */
- layout->search_unhashed++;
- }
- }
+ refresh_frame = copy_frame(frame);
+ if (!refresh_frame) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "mem allocation failed for refresh_frame");
+ return -1;
+ }
- entry->d_off = orig_entry->d_off;
- entry->d_stat = orig_entry->d_stat;
- entry->d_ino = orig_entry->d_ino;
- entry->d_type = orig_entry->d_type;
- entry->d_len = orig_entry->d_len;
-
- if (orig_entry->dict)
- entry->dict = dict_ref (orig_entry->dict);
-
- /* making sure we set the inode ctx right with layout,
- currently possible only for non-directories, so for
- directories don't set entry inodes */
- if (!IA_ISDIR(entry->d_stat.ia_type) && orig_entry->inode) {
- ret = dht_layout_preset (this, prev->this,
- orig_entry->inode);
- if (ret)
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_LAYOUT_SET_FAILED,
- "failed to link the layout in inode");
- entry->inode = inode_ref (orig_entry->inode);
- } else if (orig_entry->inode) {
- dht_inode_ctx_time_update (orig_entry->inode, this,
- &entry->d_stat, 1);
- }
+ refresh_local = dht_local_init(refresh_frame, NULL, NULL, stub->fop);
+ if (!refresh_local) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "mem allocation failed for refresh_local");
+ return -1;
+ }
- list_add_tail (&entry->list, &entries.list);
- count++;
- }
- op_ret = count;
- /* We need to ensure that only the last subvolume's end-of-directory
- * notification is respected so that directory reading does not stop
- * before all subvolumes have been read. That could happen because the
- * posix for each subvolume sends a ENOENT on end-of-directory but in
- * distribute we're not concerned only with a posix's view of the
- * directory but the aggregated namespace' view of the directory.
- */
- if (prev->this != dht_last_up_subvol (this))
- op_errno = 0;
+ refresh_local->loc.inode = inode_ref(local->loc.parent);
+ gf_uuid_copy(refresh_local->loc.gfid, local->loc.parent->gfid);
-done:
- if (count == 0) {
- /* non-zero next_offset means that
- EOF is not yet hit on the current subvol
- */
- if (next_offset == 0) {
- next_subvol = dht_subvol_next (this, prev->this);
- } else {
- next_subvol = prev->this;
- }
+ refresh_local->stub = stub;
- if (!next_subvol) {
- goto unwind;
- }
+ refresh_local->refresh_layout_unlock = dht_refresh_parent_layout_resume;
+ refresh_local->refresh_layout_done = dht_refresh_parent_layout_done;
- if (conf->readdir_optimize == _gf_true) {
- if (next_subvol != local->first_up_subvol) {
- ret = dict_set_int32 (local->xattr,
- GF_READDIR_SKIP_DIRS, 1);
- if (ret)
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value"
- ":key = %s",
- GF_READDIR_SKIP_DIRS );
- } else {
- dict_del (local->xattr,
- GF_READDIR_SKIP_DIRS);
- }
- }
+ dht_refresh_layout(refresh_frame);
+ return 0;
+}
- STACK_WIND (frame, dht_readdirp_cbk,
- next_subvol, next_subvol->fops->readdirp,
- local->fd, local->size, next_offset,
- local->xattr);
- return 0;
- }
+static int32_t
+dht_call_mkdir_stub(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_stub_t *stub = NULL;
-unwind:
- if (op_ret < 0)
- op_ret = 0;
+ local = frame->local;
+ stub = local->stub;
+ local->stub = NULL;
- DHT_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries, NULL);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = 0;
+ }
- gf_dirent_free (&entries);
+ call_resume(stub);
- return 0;
+ return 0;
}
-
-
-int
-dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *orig_entries,
- dict_t *xdata)
+static int32_t
+dht_guard_parent_layout_and_namespace(xlator_t *subvol, call_stub_t *stub)
{
- dht_local_t *local = NULL;
- gf_dirent_t entries;
- gf_dirent_t *orig_entry = NULL;
- gf_dirent_t *entry = NULL;
- call_frame_t *prev = NULL;
- xlator_t *next_subvol = NULL;
- off_t next_offset = 0;
- int count = 0;
- dht_layout_t *layout = 0;
- xlator_t *subvol = 0;
- dht_conf_t *conf = NULL;
- dht_methods_t *methods = NULL;
-
- INIT_LIST_HEAD (&entries.list);
- prev = cookie;
- local = frame->local;
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO (this->name, conf, done);
+ dht_local_t *local = NULL;
+ int ret = -1;
+ loc_t *loc = NULL;
+ xlator_t *hashed_subvol = NULL, *this = NULL;
+ ;
+ call_frame_t *frame = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int32_t *parent_disk_layout = NULL;
+ dht_layout_t *parent_layout = NULL;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", stub, err);
+
+ frame = stub->frame;
+ this = frame->this;
+
+ conf = this->private;
+
+ local = frame->local;
+
+ local->stub = stub;
+
+ /* TODO: recheck whether we should lock on src or dst if we do similar
+ * stale layout checks for rename.
+ */
+ loc = &stub->args.loc;
+
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ if (local->params == NULL) {
+ local->params = dict_new();
+ if (local->params == NULL) {
+ local->op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "dict allocation failed",
+ gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+ }
+
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "hashed subvolume not found",
+ gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ parent_layout = dht_layout_get(this, loc->parent);
+
+ ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol,
+ &parent_disk_layout);
+ if (ret == -1) {
+ local->op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "extracting in-memory layout of parent failed. ",
+ gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ memcpy((void *)local->parent_disk_layout, (void *)parent_disk_layout,
+ sizeof(local->parent_disk_layout));
+
+ dht_layout_unref(this, parent_layout);
+ parent_layout = NULL;
+
+ ret = dict_set_str(local->params, GF_PREOP_PARENT_KEY, conf->xattr_name);
+ if (ret < 0) {
+ local->op_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting %s key in params dictionary failed. ",
+ gf_fop_list[stub->fop], pgfid, loc->name, loc->path,
+ GF_PREOP_PARENT_KEY);
+ goto err;
+ }
+
+ ret = dict_set_bin(local->params, conf->xattr_name, parent_disk_layout,
+ 4 * 4);
+ if (ret < 0) {
+ local->op_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting parent-layout in params dictionary failed. ",
+ gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ parent_disk_layout = NULL;
+ local->hashed_subvol = hashed_subvol;
+
+ local->current = &local->lock[0];
+ ret = dht_protect_namespace(frame, loc, hashed_subvol, &local->current->ns,
+ dht_call_mkdir_stub);
+ if (ret < 0)
+ goto err;
+
+ return 0;
+err:
- methods = conf->methods;
- GF_VALIDATE_OR_GOTO (this->name, conf->methods, done);
+ if (parent_disk_layout != NULL)
+ GF_FREE(parent_disk_layout);
- if (op_ret < 0)
- goto done;
+ if (parent_layout != NULL)
+ dht_layout_unref(this, parent_layout);
- if (!local->layout)
- local->layout = dht_layout_get (this, local->fd->inode);
+ return -1;
+}
- layout = local->layout;
+int
+dht_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *params)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ int i = 0;
+ int ret = 0;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ conf = this->private;
+
+ dht_get_du_info(frame, this, loc);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_MKNOD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed(this, loc);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = EIO;
+ goto err;
+ }
+
+ /* Post remove-brick, the client layout may not be in sync with
+ * disk layout because of lack of lookup. Hence,a mknod call
+ * may fall on the decommissioned brick. Hence, if the
+ * hashed_subvol is part of decommissioned bricks list, do a
+ * lookup on parent dir. If a fix-layout is already done by the
+ * remove-brick process, the parent directory layout will be in
+ * sync with that of the disk. If fix-layout is still ending
+ * on the parent directory, we can let the file get created on
+ * the decommissioned brick which will be eventually migrated to
+ * non-decommissioned brick based on the new layout.
+ */
+
+ if (conf->decommission_subvols_cnt) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i] &&
+ conf->decommissioned_bricks[i] == subvol) {
+ gf_msg_debug(this->name, 0,
+ "hashed subvol:%s is "
+ "part of decommission brick list for "
+ "file: %s",
+ subvol->name, loc->path);
+
+ /* dht_refresh_layout needs directory info in
+ * local->loc. Hence, storing the parent_loc in
+ * local->loc and storing the create context in
+ * local->loc2. We will restore this information
+ * in dht_creation do */
+
+ ret = loc_copy(&local->loc2, &local->loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "loc_copy failed %s", loc->path);
- list_for_each_entry (orig_entry, (&orig_entries->list), list) {
- next_offset = orig_entry->d_off;
+ goto err;
+ }
- subvol = methods->layout_search (this, layout,
- orig_entry->d_name);
+ local->params = dict_ref(params);
+ local->rdev = rdev;
+ local->mode = mode;
+ local->umask = umask;
- if (!subvol || (subvol == prev->this)) {
- entry = gf_dirent_for_name (orig_entry->d_name);
- if (!entry) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_NO_MEMORY,
- "Memory allocation failed ");
- goto unwind;
- }
+ loc_wipe(&local->loc);
- entry->d_off = orig_entry->d_off;
- entry->d_ino = orig_entry->d_ino;
- entry->d_type = orig_entry->d_type;
- entry->d_len = orig_entry->d_len;
+ ret = dht_build_parent_loc(this, &local->loc, loc, &op_errno);
- list_add_tail (&entry->list, &entries.list);
- count++;
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED,
+ "parent loc build failed");
+ goto err;
}
- }
- op_ret = count;
- /* We need to ensure that only the last subvolume's end-of-directory
- * notification is respected so that directory reading does not stop
- * before all subvolumes have been read. That could happen because the
- * posix for each subvolume sends a ENOENT on end-of-directory but in
- * distribute we're not concerned only with a posix's view of the
- * directory but the aggregated namespace' view of the directory.
- */
- if (prev->this != dht_last_up_subvol (this))
- op_errno = 0;
-done:
- if (count == 0) {
- /* non-zero next_offset means that
- EOF is not yet hit on the current subvol
- */
- if (next_offset == 0) {
- next_subvol = dht_subvol_next (this, prev->this);
- } else {
- next_subvol = prev->this;
- }
+ ret = dht_mknod_lock(frame, subvol);
- if (!next_subvol) {
- goto unwind;
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "locking parent failed");
+ goto err;
}
- STACK_WIND (frame, dht_readdir_cbk,
- next_subvol, next_subvol->fops->readdir,
- local->fd, local->size, next_offset, NULL);
- return 0;
+ goto done;
+ }
}
+ }
-unwind:
- if (op_ret < 0)
- op_ret = 0;
+ dht_mknod_wind_to_avail_subvol(frame, this, subvol, loc, rdev, mode, umask,
+ params);
- DHT_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries, NULL);
+done:
+ return 0;
- gf_dirent_free (&entries);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
int
-dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, int whichop, dict_t *dict)
+dht_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *params)
{
- dht_local_t *local = NULL;
- int op_errno = -1;
- xlator_t *xvol = NULL;
- int ret = 0;
- dht_conf_t *conf = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (this->private, err);
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- conf = this->private;
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
- local = dht_local_init (frame, NULL, NULL, whichop);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->fd = fd_ref (fd);
- local->size = size;
- local->xattr_req = (dict)? dict_ref (dict) : NULL;
- local->first_up_subvol = dht_first_up_subvol (this);
-
- dht_deitransform (this, yoff, &xvol);
-
- /* TODO: do proper readdir */
- if (whichop == GF_FOP_READDIRP) {
- if (dict)
- local->xattr = dict_ref (dict);
- else
- local->xattr = dict_new ();
+ local = dht_local_init(frame, loc, NULL, GF_FOP_SYMLINK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- if (local->xattr) {
- ret = dict_set_uint32 (local->xattr,
- conf->link_xattr_name, 256);
- if (ret)
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value"
- " : key = %s",
- conf->link_xattr_name);
+ subvol = dht_subvol_get_hashed(this, loc);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = EIO;
+ goto err;
+ }
- if (conf->readdir_optimize == _gf_true) {
- if (xvol != local->first_up_subvol) {
- ret = dict_set_int32 (local->xattr,
- GF_READDIR_SKIP_DIRS, 1);
- if (ret)
- gf_msg (this->name,
- GF_LOG_ERROR, 0,
- DHT_MSG_DICT_SET_FAILED,
- "Failed to set "
- "dictionary value: "
- "key = %s",
- GF_READDIR_SKIP_DIRS);
- } else {
- dict_del (local->xattr,
- GF_READDIR_SKIP_DIRS);
- }
- }
- }
+ gf_msg_trace(this->name, 0, "creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_readdirp_cbk, xvol, xvol->fops->readdirp,
- fd, size, yoff, local->xattr);
- } else {
- STACK_WIND (frame, dht_readdir_cbk, xvol, xvol->fops->readdir,
- fd, size, yoff, local->xattr);
- }
+ STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->symlink, linkname, loc, umask, params);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
int
-dht_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, dict_t *xdata)
+dht_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- int op = GF_FOP_READDIR;
- dht_conf_t *conf = NULL;
- int i = 0;
-
- conf = this->private;
- if (!conf)
- goto out;
+ xlator_t *cached_subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_UNLINK);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+
+ cached_subvol = local->cached_subvol;
+ if (!cached_subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->flags = xflag;
+ STACK_WIND_COOKIE(frame, dht_unlink_cbk, cached_subvol, cached_subvol,
+ cached_subvol->fops->unlink, loc, xflag, xdata);
+
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!conf->subvolume_status[i]) {
- op = GF_FOP_READDIRP;
- break;
- }
- }
+ return 0;
+}
- if (conf->use_readdirp)
- op = GF_FOP_READDIRP;
+static int
+dht_remove_stale_linkto_cbk(int ret, call_frame_t *sync_frame, void *data)
+{
+ DHT_STACK_DESTROY(sync_frame);
+ return 0;
+}
+static int
+dht_remove_stale_linkto(void *data)
+{
+ call_frame_t *frame = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata_in = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", data, out);
+
+ frame = data;
+ local = frame->local;
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", local, out);
+ GF_VALIDATE_OR_GOTO("dht", local->link_subvol, out);
+
+ xdata_in = dict_new();
+ if (!xdata_in)
+ goto out;
+
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(xdata_in);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, 0,
+ "Failed to set keys for stale linkto"
+ "deletion on path %s",
+ local->loc.path);
+ goto out;
+ }
+
+ ret = syncop_unlink(local->link_subvol, &local->loc, xdata_in, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, 0,
+ "Removal of linkto failed"
+ " on path %s at subvol %s",
+ local->loc.path, local->link_subvol->name);
+ }
out:
- dht_do_readdir (frame, this, fd, size, yoff, op, 0);
- return 0;
+ if (xdata_in)
+ dict_unref(xdata_in);
+ return ret;
}
-int
-dht_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, dict_t *dict)
+static int
+dht_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- dht_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIRP, dict);
- return 0;
-}
+ dht_local_t *local = NULL;
+ int ret = -1;
+ gf_boolean_t stbuf_merged = _gf_false;
+ xlator_t *subvol = NULL;
+ call_frame_t *cleanup_frame = NULL;
+ dht_local_t *cleanup_local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ /* Remove the linkto if exists */
+ if (local->linked) {
+ cleanup_frame = create_frame(this, this->ctx->pool);
+ if (cleanup_frame) {
+ cleanup_local = dht_local_init(cleanup_frame, &local->loc2,
+ NULL, 0);
+ if (!cleanup_local || !local->link_subvol) {
+ DHT_STACK_DESTROY(cleanup_frame);
+ goto out;
+ }
+ cleanup_local->link_subvol = local->link_subvol;
+ FRAME_SU_DO(cleanup_frame, dht_local_t);
+ ret = synctask_new(this->ctx->env, dht_remove_stale_linkto,
+ dht_remove_stale_linkto_cbk, cleanup_frame,
+ cleanup_frame);
+ }
+ }
+ /* No continuation on DHT inode missing errors, as we should
+ * then have a good stbuf that states P2 happened. We would
+ * get inode missing if, the file completed migrated between
+ * the lookup and the link call */
+ goto out;
+ }
+
+ /* Update parent on success, even if P1/2 checks are positive.
+ * The second call on success will further update the parent */
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0);
+ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
+ }
+
+ /* Update linkto attrs, if this is the first call and non-P2,
+ * if we detect P2 then we need to trust the attrs from the
+ * second call, not the first */
+ if (local->linked == _gf_true &&
+ ((local->call_cnt == 1 && !IS_DHT_MIGRATION_PHASE2(stbuf)) ||
+ (local->call_cnt != 1 && IS_DHT_MIGRATION_PHASE2(&local->stbuf)))) {
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ stbuf_merged = _gf_true;
+ dht_linkfile_attr_heal(frame, this);
+ }
+
+ /* No further P1/2 checks if we are in the second iteration of
+ * the call */
+ if (local->call_cnt != 1) {
+ goto out;
+ } else {
+ /* Preserve the return values, in case the migration decides
+ * to recreate the link on the same subvol that the current
+ * hased for the link was created on. */
+ dht_iatt_merge(this, &local->preparent, preparent);
+ dht_iatt_merge(this, &local->postparent, postparent);
+ if (!stbuf_merged) {
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ stbuf_merged = _gf_true;
+ }
+
+ local->inode = inode_ref(inode);
+ }
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->rebalance.target_op_fn = dht_link2;
+ dht_set_local_rebalance(this, local, stbuf, preparent, postparent, xdata);
+
+ /* Check if the rebalance phase2 is true */
+ if (IS_DHT_MIGRATION_PHASE2(stbuf)) {
+ ret = dht_inode_ctx_get_mig_info(this, local->loc.inode, NULL, &subvol);
+ if (!subvol) {
+ /* Phase 2 of migration */
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ dht_link2(this, subvol, frame, 0);
+ return 0;
+ }
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(stbuf)) {
+ ret = dht_inode_ctx_get_mig_info(this, local->loc.inode, NULL, &subvol);
+ if (subvol) {
+ dht_link2(this, subvol, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+out:
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ dht_set_fixed_dir_stat(preparent);
+ dht_set_fixed_dir_stat(postparent);
+ DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, NULL);
+ return 0;
+}
-int
-dht_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+static int
+dht_link2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+ local = frame->local;
+ if (!local)
+ goto err;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1)
- local->op_errno = op_errno;
+ op_errno = local->op_errno;
- if (op_ret == 0)
- local->op_ret = 0;
- }
- UNLOCK (&frame->lock);
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (fsyncdir, frame, local->op_ret,
- local->op_errno, xdata);
+ DHT_STACK_UNWIND(link, frame, local->op_ret, op_errno, local->inode,
+ &local->stbuf, &local->preparent, &local->postparent,
+ NULL);
+ return 0;
+ }
+
+ if (subvol == NULL) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /* Second call to create link file could result in EEXIST as the
+ * first call created the linkto in the currently
+ * migrating subvol, which could be the new hashed subvol */
+ if (local->link_subvol == subvol) {
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
+ DHT_STACK_UNWIND(link, frame, 0, 0, local->inode, &local->stbuf,
+ &local->preparent, &local->postparent, NULL);
return 0;
-}
+ }
+ local->call_cnt = 2;
-int
-dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int datasync, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- int i = -1;
+ STACK_WIND(frame, dht_link_cbk, subvol, subvol->fops->link, &local->loc,
+ &local->loc2, local->xattr_req);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (this->private, err);
+ return 0;
+err:
+ DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- conf = this->private;
+ return 0;
+}
- local = dht_local_init (frame, NULL, NULL, GF_FOP_FSYNCDIR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+static int
+dht_link_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *srcvol = NULL;
- local->fd = fd_ref (fd);
- local->call_cnt = conf->subvolume_cnt;
+ if (op_ret == -1)
+ goto err;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_fsyncdir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->fsyncdir,
- fd, datasync, xdata);
- }
+ local = frame->local;
+ srcvol = local->linkfile.srcvol;
- return 0;
+ STACK_WIND(frame, dht_link_cbk, srcvol, srcvol->fops->link, &local->loc,
+ &local->loc2, local->xattr_req);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ dht_set_fixed_dir_stat(preparent);
+ dht_set_fixed_dir_stat(postparent);
+ DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, xdata);
- return 0;
+ return 0;
}
-
int
-dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+dht_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- xlator_t *prev = NULL;
- int ret = -1;
- dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ int op_errno = -1;
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(oldloc, err);
+ VALIDATE_OR_GOTO(newloc, err);
+
+ local = dht_local_init(frame, oldloc, NULL, GF_FOP_LINK);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+ local->call_cnt = 1;
+
+ cached_subvol = local->cached_subvol;
+ if (!cached_subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
+ oldloc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed(this, newloc);
+ if (!hashed_subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ newloc->path);
+ op_errno = EIO;
+ goto err;
+ }
+
+ ret = loc_copy(&local->loc2, newloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ if (hashed_subvol != cached_subvol) {
+ gf_uuid_copy(local->gfid, oldloc->inode->gfid);
+ dht_linkfile_create(frame, dht_link_linkfile_cbk, this, cached_subvol,
+ hashed_subvol, newloc);
+ } else {
+ STACK_WIND(frame, dht_link_cbk, cached_subvol,
+ cached_subvol->fops->link, oldloc, newloc, xdata);
+ }
+
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- if (op_ret == -1)
- goto out;
+ return 0;
+}
- local = frame->local;
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
+int
+dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ xlator_t *prev = NULL;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ gf_boolean_t parent_layout_changed = _gf_false;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ parent_layout_changed = (xdata &&
+ dict_get(xdata, GF_PREOP_CHECK_FAILED))
+ ? _gf_true
+ : _gf_false;
+
+ if (parent_layout_changed) {
+ if (local && local->lock[0].layout.parent_layout.locks) {
+ /* Returning failure as the layout could not be fixed even under
+ * the lock */
goto out;
- }
+ }
- prev = cookie;
+ gf_uuid_unparse(local->loc.parent->gfid, pgfid);
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "create (%s/%s) (path: %s): parent layout "
+ "changed. Attempting a layout refresh and then a "
+ "retry",
+ pgfid, local->loc.name, local->loc.path);
- if (local->loc.parent) {
+ /*
+ dht_refresh_layout needs directory info in local->loc.Hence,
+ storing the parent_loc in local->loc and storing the create
+ context in local->loc2. We will restore this information in
+ dht_creation_do.
+ */
- dht_inode_ctx_time_update (local->loc.parent, this,
- preparent, 0);
- dht_inode_ctx_time_update (local->loc.parent, this,
- postparent, 1);
- }
+ loc_wipe(&local->loc2);
+
+ ret = loc_copy(&local->loc2, &local->loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "loc_copy failed %s", local->loc.path);
- ret = dht_layout_preset (this, prev, inode);
- if (ret < 0) {
- gf_msg_debug (this->name, 0,
- "could not set pre-set layout for subvolume %s",
- prev? prev->name: NULL);
- op_ret = -1;
- op_errno = EINVAL;
goto out;
- }
- if (local->linked == _gf_true)
- dht_linkfile_attr_heal (frame, this);
-out:
- /*
- * FIXME: ia_size and st_blocks of preparent and postparent do not have
- * correct values. since, preparent and postparent buffers correspond
- * to a directory these two members should have values equal to sum of
- * corresponding values from each of the subvolume.
- * See dht_iatt_merge for reference.
- */
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, stbuf,
- preparent, postparent, xdata);
- return 0;
-}
+ }
-int
-dht_mknod_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
+ loc_wipe(&local->loc);
- if (op_ret == -1)
- goto err;
+ ret = dht_build_parent_loc(this, &local->loc, &local->loc2,
+ &op_errno);
- local = frame->local;
- if (!local || !local->cached_subvol) {
- op_errno = EINVAL;
- goto err;
- }
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED,
+ "parent loc build failed");
+ goto out;
+ }
- cached_subvol = local->cached_subvol;
+ subvol = dht_subvol_get_hashed(this, &local->loc2);
- STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)cached_subvol,
- cached_subvol, cached_subvol->fops->mknod,
- &local->loc, local->mode, local->rdev, local->umask,
- local->params);
+ ret = dht_create_lock(frame, subvol);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "locking parent failed");
+ goto out;
+ }
- return 0;
-err:
- DHT_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
+ return 0;
+ }
-int
-dht_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- xlator_t *avail_subvol = NULL;
- dht_local_t *local = NULL;
+ goto out;
+ }
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ prev = cookie;
- dht_get_du_info (frame, this, loc);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0);
- local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
+ }
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ ret = dht_fd_ctx_set(this, fd, prev);
+ if (ret != 0) {
+ gf_msg_debug(this->name, 0,
+ "Possible fd leak. "
+ "Could not set fd ctx for subvol %s",
+ prev->name);
+ }
- if (!dht_is_subvol_filled (this, subvol)) {
- gf_msg_trace (this->name, 0,
- "creating %s on %s", loc->path,
- subvol->name);
+ ret = dht_layout_preset(this, prev, inode);
+ if (ret != 0) {
+ gf_msg_debug(this->name, 0, "could not set preset layout for subvol %s",
+ prev->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
- STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol,
- subvol, subvol->fops->mknod, loc, mode,
- rdev, umask, params);
- } else {
+ local->op_errno = op_errno;
- avail_subvol = dht_free_disk_available_subvol (this, subvol,
- local);
- if (avail_subvol != subvol) {
- /* Choose the minimum filled volume, and create the
- files there */
-
- local->params = dict_ref (params);
- local->cached_subvol = avail_subvol;
- local->mode = mode;
- local->rdev = rdev;
- local->umask = umask;
- dht_linkfile_create (frame,
- dht_mknod_linkfile_create_cbk,
- this, avail_subvol, subvol, loc);
- } else {
- gf_msg_trace (this->name, 0,
- "creating %s on %s", loc->path,
- subvol->name);
-
- STACK_WIND_COOKIE (frame, dht_newfile_cbk,
- (void *)subvol, subvol,
- subvol->fops->mknod, loc, mode,
- rdev, umask, params);
- }
- }
+ if (local->linked == _gf_true) {
+ local->stbuf = *stbuf;
+ dht_linkfile_attr_heal(frame, this);
+ }
+out:
- return 0;
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ dht_set_fixed_dir_stat(preparent);
+ dht_set_fixed_dir_stat(postparent);
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ if (local && local->lock[0].layout.parent_layout.locks) {
+ /* store op_errno for failure case*/
+ local->op_errno = op_errno;
+ local->refresh_layout_unlock(frame, this, op_ret, 1);
- return 0;
+ if (op_ret == 0) {
+ DHT_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
+ preparent, postparent, xdata);
+ }
+ } else {
+ DHT_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
+ preparent, postparent, xdata);
+ }
+ return 0;
}
-
-int
-dht_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkname, loc_t *loc, mode_t umask, dict_t *params)
+static int
+dht_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ local = frame->local;
+ if (!local) {
+ op_errno = EINVAL;
+ goto err;
+ }
- local = dht_local_init (frame, loc, NULL, GF_FOP_SYMLINK);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto err;
+ }
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ conf = this->private;
+ if (!conf) {
+ local->op_errno = EINVAL;
+ op_errno = EINVAL;
+ goto err;
+ }
- gf_msg_trace (this->name, 0,
- "creating %s on %s", loc->path, subvol->name);
+ cached_subvol = local->cached_subvol;
- STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol,
- subvol->fops->symlink, linkname, loc, umask,
- params);
+ if (local->params) {
+ dict_del(local->params, conf->link_xattr_name);
+ dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY);
+ }
- return 0;
+ STACK_WIND_COOKIE(frame, dht_create_cbk, cached_subvol, cached_subvol,
+ cached_subvol->fops->create, &local->loc, local->flags,
+ local->mode, local->umask, local->fd, local->params);
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (link, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
-
- return 0;
+ if (local && local->lock[0].layout.parent_layout.locks) {
+ local->refresh_layout_unlock(frame, this, -1, 1);
+ } else {
+ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ }
+ return 0;
}
-
-int
-dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata)
+static int
+dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
+ xlator_t *subvol, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *params)
{
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *avail_subvol = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ local = frame->local;
- local = dht_local_init (frame, loc, NULL, GF_FOP_UNLINK);
- if (!local) {
- op_errno = ENOMEM;
+ if (!dht_is_subvol_filled(this, subvol)) {
+ gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
+ subvol->name);
- goto err;
- }
+ dht_set_parent_layout_in_dict(loc, this, local);
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- /* Dont fail unlink if hashed_subvol is NULL which can be the result
- * of layout anomaly */
- if (!hashed_subvol) {
- gf_msg_debug (this->name, 0,
- "no subvolume in layout for path=%s",
- loc->path);
- }
+ STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
+ subvol->fops->create, loc, flags, mode, umask, fd,
+ params);
- cached_subvol = local->cached_subvol;
- if (!cached_subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ } else {
+ avail_subvol = dht_free_disk_available_subvol(this, subvol, local);
- local->flags = xflag;
- if (hashed_subvol && hashed_subvol != cached_subvol) {
- STACK_WIND (frame, dht_unlink_linkfile_cbk,
- hashed_subvol, hashed_subvol->fops->unlink, loc,
- xflag, xdata);
- } else {
- STACK_WIND (frame, dht_unlink_cbk,
- cached_subvol, cached_subvol->fops->unlink, loc,
- xflag, xdata);
+ if (avail_subvol != subvol) {
+ local->cached_subvol = avail_subvol;
+ local->hashed_subvol = subvol;
+
+ gf_msg_debug(this->name, 0, "creating %s on %s (link at %s)",
+ loc->path, avail_subvol->name, subvol->name);
+
+ dht_linkfile_create(frame, dht_create_linkfile_create_cbk, this,
+ avail_subvol, subvol, loc);
+
+ goto out;
}
- return 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
+ gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
+ subvol->name);
- return 0;
+ dht_set_parent_layout_in_dict(loc, this, local);
+
+ STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
+ subvol->fops->create, loc, flags, mode, umask, fd,
+ params);
+ }
+out:
+ return 0;
}
int
-dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+dht_build_parent_loc(xlator_t *this, loc_t *parent, loc_t *child,
+ int32_t *op_errno)
{
- dht_local_t *local = NULL;
- int ret = -1;
- gf_boolean_t stbuf_merged = _gf_false;
- xlator_t *subvol = NULL;
+ inode_table_t *table = NULL;
+ int ret = -1;
- local = frame->local;
+ if (!parent || !child) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
- if (op_ret == -1) {
- /* No continuation on DHT inode missing errors, as we should
- * then have a good stbuf that states P2 happened. We would
- * get inode missing if, the file completed migrated between
- * the lookup and the link call */
- goto out;
+ if (child->parent) {
+ parent->inode = inode_ref(child->parent);
+ if (!parent->inode) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
}
- /* Update parent on success, even if P1/2 checks are positve.
- * The second call on success will further update the parent */
- if (local->loc.parent) {
- dht_inode_ctx_time_update (local->loc.parent, this,
- preparent, 0);
- dht_inode_ctx_time_update (local->loc.parent, this,
- postparent, 1);
- }
-
- /* Update linkto attrs, if this is the first call and non-P2,
- * if we detect P2 then we need to trust the attrs from the
- * second call, not the first */
- if (local->linked == _gf_true &&
- ((local->call_cnt == 1 && !IS_DHT_MIGRATION_PHASE2 (stbuf))
- || (local->call_cnt != 1 &&
- IS_DHT_MIGRATION_PHASE2 (&local->stbuf)))) {
- dht_iatt_merge (this, &local->stbuf, stbuf, NULL);
- stbuf_merged = _gf_true;
- dht_linkfile_attr_heal (frame, this);
- }
-
- /* No further P1/2 checks if we are in the second iteration of
- * the call */
- if (local->call_cnt != 1) {
- goto out;
- } else {
- /* Preserve the return values, in case the migration decides
- * to recreate the link on the same subvol that the current
- * hased for the link was created on. */
- dht_iatt_merge (this, &local->preparent,
- preparent, NULL);
- dht_iatt_merge (this, &local->postparent,
- postparent, NULL);
- if (!stbuf_merged) {
- dht_iatt_merge (this, &local->stbuf,
- stbuf, NULL);
- stbuf_merged = _gf_true;
- }
+ gf_uuid_copy(parent->gfid, child->pargfid);
+
+ ret = 0;
- local->inode = inode_ref (inode);
+ goto out;
+ } else {
+ if (gf_uuid_is_null(child->pargfid)) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
}
- local->op_errno = op_errno;
- local->rebalance.target_op_fn = dht_link2;
- /* Check if the rebalance phase2 is true */
- if (IS_DHT_MIGRATION_PHASE2 (stbuf)) {
- ret = dht_inode_ctx_get1 (this, local->loc.inode, &subvol);
- if (!subvol) {
- /* Phase 2 of migration */
- ret = dht_rebalance_complete_check (this, frame);
- if (!ret)
- return 0;
- } else {
- dht_link2 (this, frame, 0);
- return 0;
- }
+ table = this->itable;
+
+ if (!table) {
+ if (op_errno) {
+ *op_errno = EINVAL;
+ goto out;
+ }
}
- /* Check if the rebalance phase1 is true */
- if (IS_DHT_MIGRATION_PHASE1 (stbuf)) {
- ret = dht_inode_ctx_get1 (this, local->loc.inode, &subvol);
- if (subvol) {
- dht_link2 (this, frame, 0);
- return 0;
- }
- ret = dht_rebalance_in_progress_check (this, frame);
- if (!ret)
- return 0;
+ parent->inode = inode_find(table, child->pargfid);
+
+ if (!parent->inode) {
+ if (op_errno) {
+ *op_errno = EINVAL;
+ goto out;
+ }
}
-out:
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf,
- preparent, postparent, NULL);
+ gf_uuid_copy(parent->gfid, child->pargfid);
- return 0;
-}
+ ret = 0;
+ }
+out:
+ return ret;
+}
-int
-dht_link2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int32_t
+dht_create_do(call_frame_t *frame)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- int op_errno = EINVAL;
+ dht_local_t *local = NULL;
+ dht_layout_t *refreshed = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
- local = frame->local;
- if (!local)
- goto err;
+ local = frame->local;
- op_errno = local->op_errno;
- if (op_ret == -1)
- goto err;
+ this = THIS;
- dht_inode_ctx_get1 (this, local->loc.inode, &subvol);
- if (!subvol) {
- subvol = local->cached_subvol;
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
- }
+ conf = this->private;
- /* Second call to create link file could result in EEXIST as the
- * first call created the linkto in the currently
- * migrating subvol, which could be the new hashed subvol */
- if (local->link_subvol == subvol) {
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (link, frame, 0, 0, local->inode,
- &local->stbuf, &local->preparent,
- &local->postparent, NULL);
+ GF_VALIDATE_OR_GOTO(this->name, conf, err);
- return 0;
- }
+ methods = &(conf->methods);
- local->call_cnt = 2;
+ /* We don't need parent_loc anymore */
+ loc_wipe(&local->loc);
- STACK_WIND (frame, dht_link_cbk, subvol, subvol->fops->link,
- &local->loc, &local->loc2, NULL);
+ loc_copy(&local->loc, &local->loc2);
- return 0;
+ loc_wipe(&local->loc2);
+
+ refreshed = local->selfheal.refreshed_layout;
+
+ subvol = methods->layout_search(this, refreshed, local->loc.name);
+
+ if (!subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "no subvolume in "
+ "layout for path=%s",
+ local->loc.path);
+ local->op_errno = ENOENT;
+ goto err;
+ }
+
+ dht_create_wind_to_avail_subvol(frame, this, subvol, &local->loc,
+ local->flags, local->mode, local->umask,
+ local->fd, local->params);
+ return 0;
err:
- DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
+ local->refresh_layout_unlock(frame, this, -1, 1);
- return 0;
+ return 0;
}
-int
-dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+static int32_t
+dht_create_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- xlator_t *srcvol = NULL;
-
- if (op_ret == -1)
- goto err;
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
- local = frame->local;
- srcvol = local->linkfile.srcvol;
+static int32_t
+dht_create_finish(call_frame_t *frame, xlator_t *this, int op_ret,
+ int invoke_cbk)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
+
+ local = frame->local;
+ lock_count = dht_lock_count(local->lock[0].layout.parent_layout.locks,
+ local->lock[0].layout.parent_layout.lk_count);
+ if (lock_count == 0)
+ goto done;
+
+ lock_frame = copy_frame(frame);
+ if (lock_frame == NULL) {
+ goto done;
+ }
+
+ lock_local = dht_local_init(lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL) {
+ goto done;
+ }
+
+ lock_local->lock[0]
+ .layout.parent_layout.locks = local->lock[0].layout.parent_layout.locks;
+ lock_local->lock[0].layout.parent_layout.lk_count =
+ local->lock[0].layout.parent_layout.lk_count;
+
+ local->lock[0].layout.parent_layout.locks = NULL;
+ local->lock[0].layout.parent_layout.lk_count = 0;
+
+ dht_unlock_inodelk(lock_frame,
+ lock_local->lock[0].layout.parent_layout.locks,
+ lock_local->lock[0].layout.parent_layout.lk_count,
+ dht_create_unlock_cbk);
+ lock_frame = NULL;
- STACK_WIND (frame, dht_link_cbk, srcvol, srcvol->fops->link,
- &local->loc, &local->loc2, xdata);
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY(lock_frame);
+ }
+ if (op_ret == 0)
return 0;
-err:
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent, NULL);
-
- return 0;
+ DHT_STACK_UNWIND(create, frame, op_ret, local->op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
}
-
-int
-dht_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+static int32_t
+dht_create_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- int op_errno = -1;
- int ret = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (oldloc, err);
- VALIDATE_OR_GOTO (newloc, err);
+ local = frame->local;
- local = dht_local_init (frame, oldloc, NULL, GF_FOP_LINK);
- if (!local) {
- op_errno = ENOMEM;
+ if (!local) {
+ goto err;
+ }
- goto err;
- }
- local->call_cnt = 1;
+ if (op_ret < 0) {
+ gf_msg("DHT", GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "Create lock failed for file: %s", local->loc2.name);
- cached_subvol = local->cached_subvol;
- if (!cached_subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for path=%s", oldloc->path);
- op_errno = ENOENT;
- goto err;
- }
+ local->op_errno = op_errno;
- hashed_subvol = dht_subvol_get_hashed (this, newloc);
- if (!hashed_subvol) {
- gf_msg_debug (this->name, 0,
- "no subvolume in layout for path=%s",
- newloc->path);
- op_errno = ENOENT;
- goto err;
- }
+ goto err;
+ }
- ret = loc_copy (&local->loc2, newloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
- }
+ local->refresh_layout_unlock = dht_create_finish;
- if (hashed_subvol != cached_subvol) {
- gf_uuid_copy (local->gfid, oldloc->inode->gfid);
- dht_linkfile_create (frame, dht_link_linkfile_cbk, this,
- cached_subvol, hashed_subvol, newloc);
- } else {
- STACK_WIND (frame, dht_link_cbk,
- cached_subvol, cached_subvol->fops->link,
- oldloc, newloc, xdata);
- }
+ local->refresh_layout_done = dht_create_do;
- return 0;
+ dht_refresh_layout(frame);
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
-
- return 0;
+ if (local)
+ dht_create_finish(frame, this, -1, 0);
+ else
+ DHT_STACK_UNWIND(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
}
-
-int
-dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+int32_t
+dht_create_lock(call_frame_t *frame, xlator_t *subvol)
{
- call_frame_t *prev = NULL;
- int ret = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1;
+ dht_lock_t **lk_array = NULL;
- if (op_ret == -1)
- goto out;
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO(frame->this->name, frame->local, err);
- local = frame->local;
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ local = frame->local;
- prev = cookie;
+ lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
- if (local->loc.parent) {
- dht_inode_ctx_time_update (local->loc.parent, this,
- preparent, 0);
+ if (lk_array == NULL)
+ goto err;
- dht_inode_ctx_time_update (local->loc.parent, this,
- postparent, 1);
- }
+ lk_array[0] = dht_lock_new(frame->this, subvol, &local->loc, F_RDLCK,
+ DHT_LAYOUT_HEAL_DOMAIN, NULL,
+ IGNORE_ENOENT_ESTALE);
- ret = dht_layout_preset (this, prev->this, inode);
- if (ret != 0) {
- gf_msg_debug (this->name, 0,
- "could not set preset layout for subvol %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
- if (local->linked == _gf_true) {
- local->stbuf = *stbuf;
- dht_linkfile_attr_heal (frame, this);
- }
-out:
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, stbuf, preparent,
- postparent, xdata);
- return 0;
-}
+ if (lk_array[0] == NULL)
+ goto err;
+ local->lock[0].layout.parent_layout.locks = lk_array;
+ local->lock[0].layout.parent_layout.lk_count = count;
-int
-dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
+ ret = dht_blocking_inodelk(frame, lk_array, count, dht_create_lock_cbk);
- if (op_ret == -1)
- goto err;
+ if (ret < 0) {
+ local->lock[0].layout.parent_layout.locks = NULL;
+ local->lock[0].layout.parent_layout.lk_count = 0;
+ goto err;
+ }
- local = frame->local;
- cached_subvol = local->cached_subvol;
+ return 0;
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free(lk_array, count);
+ GF_FREE(lk_array);
+ }
- STACK_WIND (frame, dht_create_cbk,
- cached_subvol, cached_subvol->fops->create,
- &local->loc, local->flags, local->mode,
- local->umask, local->fd, local->params);
+ return -1;
+}
+
+int
+dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local)
+{
+ dht_conf_t *conf = this->private;
+ dht_layout_t *parent_layout = NULL;
+ int *parent_disk_layout = NULL;
+ xlator_t *hashed_subvol = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ parent_layout = dht_layout_get(this, loc->parent);
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+
+ ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol,
+ &parent_disk_layout);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "extracting in-memory layout of parent failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ ret = dict_set_str_sizen(local->params, GF_PREOP_PARENT_KEY,
+ conf->xattr_name);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting %s key in params dictionary failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path,
+ GF_PREOP_PARENT_KEY);
+ goto err;
+ }
+
+ ret = dict_set_bin(local->params, conf->xattr_name, parent_disk_layout,
+ 4 * 4);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting parent-layout in params dictionary failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
- return 0;
err:
- DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL, NULL);
- return 0;
+ dht_layout_unref(this, parent_layout);
+ return ret;
}
int
-dht_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *params)
+dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
{
- int op_errno = -1;
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- xlator_t *avail_subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ int op_errno = -1;
+ xlator_t *subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ int ret = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ conf = this->private;
+
+ dht_get_du_info(frame, this, loc);
+
+ local = dht_local_init(frame, loc, fd, GF_FOP_CREATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->params = dict_ref(params);
+ local->flags = flags;
+ local->mode = mode;
+ local->umask = umask;
+
+ if (dht_filter_loc_subvol_key(this, loc, &local->loc, &subvol)) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "creating %s on %s (got create on %s)", local->loc.path,
+ subvol->name, loc->path);
+
+ /* Since lookup-optimize is enabled by default, we need
+ * to create the linkto file if required.
+ * Note this does not check for decommisioned bricks
+ * and min-free-disk limits as this is a debugging tool
+ * and not expected to be used in production.
+ */
+ hashed_subvol = dht_subvol_get_hashed(this, &local->loc);
- dht_get_du_info (frame, this, loc);
+ if (hashed_subvol && (hashed_subvol != subvol)) {
+ /* Create the linkto file and then the data file */
+ local->cached_subvol = subvol;
+ local->hashed_subvol = hashed_subvol;
- local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
+ dht_linkfile_create(frame, dht_create_linkfile_create_cbk, this,
+ subvol, hashed_subvol, &local->loc);
+ goto done;
}
+ /* We either don't have a hashed subvol or the hashed subvol is
+ * the same as the one specified. No need to create the linkto
+ * file as we expect a lookup everywhere if there are problems
+ * with the parent layout
+ */
- if (dht_filter_loc_subvol_key (this, loc, &local->loc,
- &subvol)) {
- gf_log (this->name, GF_LOG_INFO,
- "creating %s on %s (got create on %s)",
- local->loc.path, subvol->name, loc->path);
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- &local->loc, flags, mode, umask, fd, params);
- goto done;
- }
+ dht_set_parent_layout_in_dict(loc, this, local);
+
+ STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
+ subvol->fops->create, &local->loc, flags, mode, umask,
+ fd, params);
+ goto done;
+ }
+
+ subvol = dht_subvol_get_hashed(this, loc);
+ if (!subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "no subvolume in layout for path=%s", loc->path);
+
+ op_errno = EIO;
+ goto err;
+ }
+
+ /* Post remove-brick, the client layout may not be in sync with
+ * disk layout because of lack of lookup. Hence,a create call
+ * may fall on the decommissioned brick. Hence, if the
+ * hashed_subvol is part of decommissioned bricks list, do a
+ * lookup on parent dir. If a fix-layout is already done by the
+ * remove-brick process, the parent directory layout will be in
+ * sync with that of the disk. If fix-layout is still ending
+ * on the parent directory, we can let the file get created on
+ * the decommissioned brick which will be eventually migrated to
+ * non-decommissioned brick based on the new layout.
+ */
+
+ if (conf->decommission_subvols_cnt) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i] &&
+ conf->decommissioned_bricks[i] == subvol) {
+ gf_msg_debug(this->name, 0,
+ "hashed subvol:%s is "
+ "part of decommission brick list for "
+ "file: %s",
+ subvol->name, loc->path);
+
+ /* dht_refresh_layout needs directory info in
+ * local->loc. Hence, storing the parent_loc in
+ * local->loc and storing the create context in
+ * local->loc2. We will restore this information
+ * in dht_creation do */
+
+ ret = loc_copy(&local->loc2, &local->loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "loc_copy failed %s", loc->path);
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ goto err;
+ }
- if (!dht_is_subvol_filled (this, subvol)) {
- gf_msg_trace (this->name, 0,
- "creating %s on %s", loc->path,
- subvol->name);
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- loc, flags, mode, umask, fd, params);
- goto done;
- }
- /* Choose the minimum filled volume, and create the
- files there */
- avail_subvol = dht_free_disk_available_subvol (this, subvol, local);
- if (avail_subvol != subvol) {
- local->params = dict_ref (params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
- local->cached_subvol = avail_subvol;
- local->hashed_subvol = subvol;
- gf_msg_trace (this->name, 0,
- "creating %s on %s (link at %s)", loc->path,
- avail_subvol->name, subvol->name);
- dht_linkfile_create (frame, dht_create_linkfile_create_cbk,
- this, avail_subvol, subvol, loc);
- goto done;
- }
- gf_msg_trace (this->name, 0,
- "creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- loc, flags, mode, umask, fd, params);
-done:
- return 0;
+ loc_wipe(&local->loc);
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL, NULL);
+ ret = dht_build_parent_loc(this, &local->loc, loc, &op_errno);
- return 0;
-}
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED,
+ "parent loc build failed");
+ goto err;
+ }
+ ret = dht_create_lock(frame, subvol);
-int
-dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "locking parent failed");
+ goto err;
+ }
- local = frame->local;
- layout = local->selfheal.layout;
+ goto done;
+ }
+ }
+ }
- if (op_ret == 0) {
- dht_layout_set (this, local->inode, layout);
- if (local->loc.parent) {
- dht_inode_ctx_time_update (local->loc.parent, this,
- &local->preparent, 0);
+ dht_create_wind_to_avail_subvol(frame, this, subvol, loc, flags, mode,
+ umask, fd, params);
+done:
+ return 0;
- dht_inode_ctx_time_update (local->loc.parent, this,
- &local->postparent, 1);
- }
- }
+err:
- DHT_STACK_UNWIND (mkdir, frame, op_ret, op_errno,
- local->inode, &local->stbuf, &local->preparent,
- &local->postparent, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
- return 0;
+ return 0;
}
-int
-dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+static int
+dht_mkdir_selfheal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- int ret = -1;
- gf_boolean_t subvol_filled = _gf_false;
- gf_boolean_t dir_exists = _gf_false;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
- local = frame->local;
- prev = cookie;
- layout = local->layout;
+ local = frame->local;
+ layout = local->selfheal.layout;
- subvol_filled = dht_is_subvol_filled (this, prev->this);
+ FRAME_SU_UNDO(frame, dht_local_t);
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
- LOCK (&frame->lock);
- {
- if (subvol_filled && (op_ret != -1)) {
- ret = dht_layout_merge (this, layout, prev->this,
- -1, ENOSPC, NULL);
- } else {
- if (op_ret == -1 && op_errno == EEXIST) {
- /* Very likely just a race between mkdir and
- self-heal (from lookup of a concurrent mkdir
- attempt).
- Ignore error for now. layout setting will
- anyways fail if this was a different (old)
- pre-existing different directory.
- */
- op_ret = 0;
- dir_exists = _gf_true;
- }
- ret = dht_layout_merge (this, layout, prev->this,
- op_ret, op_errno, NULL);
- }
- if (ret)
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_LAYOUT_MERGE_FAILED,
- "%s: failed to merge layouts for subvol %s",
- local->loc.path, prev->this->name);
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto unlock;
- }
+ if (op_ret == 0) {
+ dht_layout_set(this, local->inode, layout);
- if (dir_exists)
- goto unlock;
+ dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->preparent, 0);
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preparent, preparent, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent,
- prev->this);
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
}
-unlock:
- UNLOCK (&frame->lock);
+ }
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- dht_selfheal_new_directory (frame, dht_mkdir_selfheal_cbk,
- layout);
- }
+ DHT_STACK_UNWIND(mkdir, frame, op_ret, op_errno, local->inode,
+ &local->stbuf, &local->preparent, &local->postparent,
+ NULL);
- return 0;
+ return 0;
}
-int
-dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- int ret = -1;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- dht_conf_t *conf = NULL;
- int i = 0;
- xlator_t *hashed_subvol = NULL;
-
- VALIDATE_OR_GOTO (this->private, err);
-
- local = frame->local;
- prev = cookie;
- layout = local->layout;
- conf = this->private;
- hashed_subvol = local->hashed_subvol;
-
- if (gf_uuid_is_null (local->loc.gfid) && !op_ret)
- gf_uuid_copy (local->loc.gfid, stbuf->ia_gfid);
-
- if (dht_is_subvol_filled (this, hashed_subvol))
- ret = dht_layout_merge (this, layout, prev->this,
- -1, ENOSPC, NULL);
- else
- ret = dht_layout_merge (this, layout, prev->this,
- op_ret, op_errno, NULL);
-
- /* TODO: we may have to return from the function
- if layout merge fails. For now, lets just log an error */
+static int
+dht_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ int ret = -1;
+ gf_boolean_t subvol_filled = _gf_false;
+ gf_boolean_t dir_exists = _gf_false;
+ xlator_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ layout = local->layout;
+
+ subvol_filled = dht_is_subvol_filled(this, prev);
+
+ LOCK(&frame->lock);
+ {
+ if (subvol_filled && (op_ret != -1)) {
+ ret = dht_layout_merge(this, layout, prev, -1, ENOSPC, NULL);
+ } else {
+ if (op_ret == -1 && op_errno == EEXIST) {
+ /* Very likely just a race between mkdir and
+ self-heal (from lookup of a concurrent mkdir
+ attempt).
+ Ignore error for now. layout setting will
+ anyways fail if this was a different (old)
+ pre-existing different directory.
+ */
+ op_ret = 0;
+ dir_exists = _gf_true;
+ }
+ ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, NULL);
+ }
if (ret)
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_LAYOUT_MERGE_FAILED,
- "%s: failed to merge layouts for subvol %s",
- local->loc.path, prev->this->name);
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ "%s: failed to merge layouts for subvol %s", local->loc.path,
+ prev->name);
if (op_ret == -1) {
- local->op_errno = op_errno;
- goto err;
+ local->op_errno = op_errno;
+ goto unlock;
}
- local->op_ret = 0;
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preparent, preparent, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent, prev->this);
+ if (dir_exists)
+ goto unlock;
- local->call_cnt = conf->subvolume_cnt - 1;
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->preparent, preparent);
+ dht_iatt_merge(this, &local->postparent, postparent);
+ }
+unlock:
+ UNLOCK(&frame->lock);
- if (gf_uuid_is_null (local->loc.gfid))
- gf_uuid_copy (local->loc.gfid, stbuf->ia_gfid);
- if (local->call_cnt == 0) {
- dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk,
- &local->loc, layout);
- }
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == hashed_subvol)
- continue;
- STACK_WIND (frame, dht_mkdir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->mkdir, &local->loc,
- local->mode, local->umask, local->params);
- }
- return 0;
-err:
- DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ /*Unlock entrylk and inodelk once mkdir is done on all subvols*/
+ dht_unlock_namespace(frame, &local->lock[0]);
+ FRAME_SU_DO(frame, dht_local_t);
+ dht_selfheal_new_directory(frame, dht_mkdir_selfheal_cbk, layout);
+ }
+
+ return 0;
}
+static int
+dht_mkdir_hashed_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
-int
-dht_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
+static int
+dht_mkdir_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *params)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- xlator_t *hashed_subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1, ret = -1;
+ xlator_t *hashed_subvol = NULL;
+ int32_t *parent_disk_layout = NULL;
+ dht_layout_t *parent_layout = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ conf = this->private;
+ local = frame->local;
+
+ if (local->op_ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): refreshing parent layout "
+ "failed.",
+ pgfid, loc->name, loc->path);
+ op_errno = local->op_errno;
+ goto err;
+ }
+
+ local->op_ret = -1;
+
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (hashed_subvol == NULL) {
+ gf_msg_debug(this->name, 0,
+ "mkdir (%s/%s) (path: %s): hashed subvol not "
+ "found",
+ pgfid, loc->name, loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ local->hashed_subvol = hashed_subvol;
+
+ parent_layout = dht_layout_get(this, loc->parent);
+
+ ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol,
+ &parent_disk_layout);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, EIO, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): "
+ "extracting in-memory layout of parent failed. ",
+ pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ if (memcmp(local->parent_disk_layout, parent_disk_layout,
+ sizeof(local->parent_disk_layout)) == 0) {
+ gf_msg(this->name, GF_LOG_WARNING, EIO, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): loop detected. "
+ "parent layout didn't change even though "
+ "previous attempt of mkdir failed because of "
+ "in-memory layout not matching with that on disk.",
+ pgfid, loc->name, loc->path);
+ op_errno = EIO;
+ goto err;
+ }
+
+ memcpy((void *)local->parent_disk_layout, (void *)parent_disk_layout,
+ sizeof(local->parent_disk_layout));
+
+ dht_layout_unref(this, parent_layout);
+ parent_layout = NULL;
+
+ ret = dict_set_str(params, GF_PREOP_PARENT_KEY, conf->xattr_name);
+ if (ret < 0) {
+ local->op_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): "
+ "setting %s key in params dictionary failed. ",
+ pgfid, loc->name, loc->path, GF_PREOP_PARENT_KEY);
+ goto err;
+ }
+
+ ret = dict_set_bin(params, conf->xattr_name, parent_disk_layout, 4 * 4);
+ if (ret < 0) {
+ local->op_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "setting parent-layout in params dictionary failed. "
+ "mkdir (%s/%s) (path: %s)",
+ pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ parent_disk_layout = NULL;
+
+ STACK_WIND_COOKIE(frame, dht_mkdir_hashed_cbk, hashed_subvol, hashed_subvol,
+ hashed_subvol->fops->mkdir, loc, mode, umask, params);
+
+ return 0;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (this->private, err);
+err:
+ dht_unlock_namespace(frame, &local->lock[0]);
- conf = this->private;
+ op_errno = local ? local->op_errno : op_errno;
+ DHT_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- dht_get_du_info (frame, this, loc);
+ if (parent_disk_layout != NULL)
+ GF_FREE(parent_disk_layout);
- local = dht_local_init (frame, loc, NULL, GF_FOP_MKDIR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ if (parent_layout != NULL)
+ dht_layout_unref(this, parent_layout);
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- if (hashed_subvol == NULL) {
- gf_msg_debug (this->name, 0,
- "hashed subvol not found for %s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ return 0;
+}
- local->hashed_subvol = hashed_subvol;
- local->mode = mode;
- local->umask = umask;
- local->params = dict_ref (params);
- local->inode = inode_ref (loc->inode);
+static int
+dht_mkdir_hashed_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ xlator_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *hashed_subvol = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ gf_boolean_t parent_layout_changed = _gf_false;
+ call_stub_t *stub = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ layout = local->layout;
+ conf = this->private;
+ hashed_subvol = local->hashed_subvol;
+
+ gf_uuid_unparse(local->loc.parent->gfid, pgfid);
+
+ if (gf_uuid_is_null(local->loc.gfid) && !op_ret)
+ gf_uuid_copy(local->loc.gfid, stbuf->ia_gfid);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
- if (!local->layout) {
- op_errno = ENOMEM;
+ parent_layout_changed = (xdata &&
+ dict_get(xdata, GF_PREOP_CHECK_FAILED))
+ ? 1
+ : 0;
+ if (parent_layout_changed) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): parent layout "
+ "changed. Attempting a refresh and then a "
+ "retry",
+ pgfid, local->loc.name, local->loc.path);
+
+ stub = fop_mkdir_stub(frame, dht_mkdir_helper, &local->loc,
+ local->mode, local->umask, local->params);
+ if (stub == NULL) {
goto err;
- }
-
- STACK_WIND (frame, dht_mkdir_hashed_cbk,
- hashed_subvol,
- hashed_subvol->fops->mkdir,
- loc, mode, umask, params);
+ }
+ ret = dht_handle_parent_layout_change(this, stub);
+ if (ret) {
+ goto err;
+ }
+
+ stub = NULL;
+
+ return 0;
+ }
+
+ goto err;
+ }
+
+ dict_del(local->params, GF_PREOP_PARENT_KEY);
+ dict_del(local->params, conf->xattr_name);
+
+ if (dht_is_subvol_filled(this, hashed_subvol))
+ ret = dht_layout_merge(this, layout, prev, -1, ENOSPC, NULL);
+ else
+ ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, NULL);
+
+ /* TODO: we may have to return from the function
+ if layout merge fails. For now, lets just log an error */
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ "%s: failed to merge layouts for subvol %s", local->loc.path,
+ prev->name);
+
+ local->op_ret = 0;
+
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->preparent, preparent);
+ dht_iatt_merge(this, &local->postparent, postparent);
+
+ local->call_cnt = conf->subvolume_cnt - 1;
+ /* Delete internal mds xattr from params dict to avoid store
+ internal mds xattr on other subvols
+ */
+ dict_del(local->params, conf->mds_xattr_key);
+
+ if (gf_uuid_is_null(local->loc.gfid))
+ gf_uuid_copy(local->loc.gfid, stbuf->ia_gfid);
+
+ /* Set hashed subvol as a mds subvol on inode ctx */
+ /*if (!local->inode)
+ local->inode = inode_ref (inode);
+ */
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this, hashed_subvol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set hashed subvol for %s on inode vol is %s",
+ local->loc.path, hashed_subvol->name);
+ }
+
+ if (local->call_cnt == 0) {
+ /*Unlock namespace lock once mkdir is done on all subvols*/
+ dht_unlock_namespace(frame, &local->lock[0]);
+ FRAME_SU_DO(frame, dht_local_t);
+ dht_selfheal_directory(frame, dht_mkdir_selfheal_cbk, &local->loc,
+ layout);
return 0;
-
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == hashed_subvol)
+ continue;
+ STACK_WIND_COOKIE(frame, dht_mkdir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i], conf->subvolumes[i]->fops->mkdir,
+ &local->loc, local->mode, local->umask,
+ local->params);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
+ if (local->op_ret != 0) {
+ dht_unlock_namespace(frame, &local->lock[0]);
+ }
- return 0;
+ DHT_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
}
+static int
+dht_mkdir_guard_parent_layout_cbk(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask,
+ dict_t *params)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = 0;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = -1;
+ int32_t zero[1] = {0};
+
+ local = frame->local;
+ conf = this->private;
+
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ if (local->op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): "
+ "Acquiring lock on parent to guard against "
+ "layout-change failed.",
+ pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ local->op_ret = -1;
+ /* Add internal MDS xattr on disk for hashed subvol
+ */
+ ret = dht_dict_set_array(params, conf->mds_xattr_key, zero, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s",
+ conf->mds_xattr_key, loc->path);
+ }
+
+ STACK_WIND_COOKIE(frame, dht_mkdir_hashed_cbk, local->hashed_subvol,
+ local->hashed_subvol, local->hashed_subvol->fops->mkdir,
+ loc, mode, umask, params);
+
+ return 0;
+err:
+ DHT_STACK_UNWIND(mkdir, frame, -1, local->op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+
+ return 0;
+}
int
-dht_rmdir_selfheal_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+dht_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *params)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = EINVAL, ret = -1;
+ xlator_t *hashed_subvol = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ call_stub_t *stub = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ conf = this->private;
+
+ if (!params || !dict_get(params, "gfid-req")) {
+ op_errno = EPERM;
+ gf_msg_callingfn(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_GFID_NULL,
+ "mkdir: %s is received "
+ "without gfid-req %p",
+ loc->path, params);
+ goto err;
+ }
+
+ dht_get_du_info(frame, this, loc);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_MKDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (hashed_subvol == NULL) {
+ gf_msg_debug(this->name, 0, "hashed subvol not found for %s",
+ loc->path);
+ local->op_errno = EIO;
+ goto err;
+ }
+
+ local->hashed_subvol = hashed_subvol;
+ local->mode = mode;
+ local->umask = umask;
+ if (params)
+ local->params = dict_ref(params);
+
+ local->inode = inode_ref(loc->inode);
+
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ /* set the newly created directory hash to the commit hash
+ * if the configuration option is set. If configuration option
+ * is not set, the older clients may still be connecting to the
+ * volume and hence we need to preserve the 1 in disk[0] part of the
+ * layout xattr */
+ if (conf->lookup_optimize)
+ local->layout->commit_hash = conf->vol_commit_hash;
+ else
+ local->layout->commit_hash = DHT_LAYOUT_HASH_INVALID;
+
+ stub = fop_mkdir_stub(frame, dht_mkdir_guard_parent_layout_cbk, loc, mode,
+ umask, params);
+ if (stub == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): "
+ "creating stub failed.",
+ pgfid, loc->name, loc->path);
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = dht_guard_parent_layout_and_namespace(this, stub);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s) cannot wind lock request to "
+ "guard parent layout",
+ pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ return 0;
- local = frame->local;
-
- DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, NULL);
+err:
+ op_errno = local ? local->op_errno : op_errno;
+ DHT_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
-int
-dht_rmdir_hashed_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+static int
+dht_rmdir_selfheal_cbk(call_frame_t *heal_frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- char gfid[GF_UUID_BUF_SIZE] ={0};
+ dht_local_t *local = NULL;
+ dht_local_t *heal_local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
- prev = cookie;
- conf = this->private;
+ heal_local = heal_frame->local;
+ main_frame = heal_local->main_frame;
+ local = main_frame->local;
- gf_uuid_unparse(local->loc.gfid, gfid);
+ DHT_STACK_DESTROY(heal_frame);
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- local->op_ret = -1;
- if (conf->subvolume_cnt != 1) {
- if (op_errno != ENOENT && op_errno != EACCES) {
- local->need_selfheal = 1;
- }
- }
+ DHT_STACK_UNWIND(rmdir, main_frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
- gf_msg_debug (this->name, 0,
- "rmdir on %s for %s failed "
- "(gfid = %s) (%s)",
- prev->this->name, local->loc.path,
- gfid, strerror (op_errno));
- goto unlock;
- }
+ return 0;
+}
- dht_iatt_merge (this, &local->preparent, preparent, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent,
- prev->this);
+static int
+dht_rmdir_hashed_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_local_t *heal_local = NULL;
+ call_frame_t *heal_frame = NULL;
+ dht_conf_t *conf = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ if (conf->subvolume_cnt != 1) {
+ if (op_errno != ENOENT && op_errno != EACCES &&
+ op_errno != ESTALE) {
+ local->need_selfheal = 1;
+ }
+ }
+ gf_msg_debug(this->name, op_errno,
+ "rmdir on %s for %s failed "
+ "(gfid = %s)",
+ prev->name, local->loc.path, gfid);
+ goto unlock;
}
+
+ dht_iatt_merge(this, &local->preparent, preparent);
+ dht_iatt_merge(this, &local->postparent, postparent);
+ }
unlock:
- UNLOCK (&frame->lock);
-
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- if (local->need_selfheal) {
- local->layout =
- dht_layout_get (this, local->loc.inode);
-
- /* TODO: neater interface needed below */
- local->stbuf.ia_type = local->loc.inode->ia_type;
-
- gf_uuid_copy (local->gfid, local->loc.inode->gfid);
- dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk,
- &local->loc, local->layout);
- } else {
-
- if (local->loc.parent) {
- dht_inode_ctx_time_update (local->loc.parent,
- this,
- &local->preparent,
- 0);
-
- dht_inode_ctx_time_update (local->loc.parent,
- this,
- &local->postparent,
- 1);
- }
+ UNLOCK(&frame->lock);
- DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
- local->op_errno, &local->preparent,
- &local->postparent, NULL);
- }
- }
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ if (local->need_selfheal) {
+ dht_rmdir_unlock(frame, this);
+ local->layout = dht_layout_get(this, local->loc.inode);
- return 0;
-}
+ /* TODO: neater interface needed below */
+ local->stbuf.ia_type = local->loc.inode->ia_type;
+ gf_uuid_copy(local->gfid, local->loc.inode->gfid);
-int
-dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- int done = 0;
- char gfid[GF_UUID_BUF_SIZE] ={0};
+ /* Use a different frame or else the rmdir op_ret is
+ * overwritten by that of the selfheal */
- local = frame->local;
- prev = cookie;
+ heal_frame = copy_frame(frame);
+
+ if (heal_frame == NULL) {
+ goto err;
+ }
+ heal_local = dht_local_init(heal_frame, &local->loc, NULL, 0);
+ if (!heal_local) {
+ DHT_STACK_DESTROY(heal_frame);
+ goto err;
+ }
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- if ((op_errno != ENOENT) && (op_errno != ESTALE)) {
- local->op_errno = op_errno;
- local->op_ret = -1;
+ heal_local->inode = inode_ref(local->loc.inode);
+ heal_local->main_frame = frame;
+ gf_uuid_copy(heal_local->gfid, local->loc.inode->gfid);
- if (op_errno != EACCES)
- local->need_selfheal = 1;
- }
+ dht_selfheal_restore(heal_frame, dht_rmdir_selfheal_cbk,
+ &heal_local->loc, heal_local->layout);
+ return 0;
+ } else {
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->preparent, 0);
- gf_uuid_unparse(local->loc.gfid, gfid);
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
+ }
- gf_msg_debug (this->name, 0,
- "rmdir on %s for %s failed."
- "(gfid = %s) (%s)",
- prev->this->name, local->loc.path,
- gfid, strerror (op_errno));
- goto unlock;
- }
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
- /* Track if rmdir succeeded on atleast one subvol*/
- local->fop_succeeded = 1;
- dht_iatt_merge (this, &local->preparent, preparent, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent,
- prev->this);
+ dht_rmdir_unlock(frame, this);
+ DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
}
-unlock:
- UNLOCK (&frame->lock);
+ }
+ return 0;
- this_call_cnt = dht_frame_return (frame);
+err:
+ DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno, NULL, NULL,
+ NULL);
+ return 0;
+}
- /* if local->hashed_subvol, we are yet to wind to hashed_subvol. */
- if (local->hashed_subvol && (this_call_cnt == 1)) {
- done = 1;
- } else if (!local->hashed_subvol && !this_call_cnt) {
- done = 1;
- }
+static int
+dht_rmdir_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
+static int
+dht_rmdir_unlock(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
- if (done) {
- if (local->need_selfheal && local->fop_succeeded) {
- local->layout =
- dht_layout_get (this, local->loc.inode);
+ local = frame->local;
- /* TODO: neater interface needed below */
- local->stbuf.ia_type = local->loc.inode->ia_type;
+ /* Unlock entrylk */
+ dht_unlock_entrylk_wrapper(frame, &local->lock[0].ns.directory_ns);
- gf_uuid_copy (local->gfid, local->loc.inode->gfid);
- dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk,
- &local->loc, local->layout);
- } else if (this_call_cnt) {
- /* If non-hashed subvol's have responded, proceed */
+ /* Unlock inodelk */
+ lock_count = dht_lock_count(local->lock[0].ns.parent_layout.locks,
+ local->lock[0].ns.parent_layout.lk_count);
- local->need_selfheal = 0;
- STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk,
- local->hashed_subvol,
- local->hashed_subvol->fops->rmdir,
- &local->loc, local->flags, NULL);
- } else if (!this_call_cnt) {
- /* All subvol's have responded, proceed */
+ if (lock_count == 0)
+ goto done;
- if (local->loc.parent) {
+ lock_frame = copy_frame(frame);
+ if (lock_frame == NULL)
+ goto done;
- dht_inode_ctx_time_update (local->loc.parent,
- this,
- &local->preparent,
- 0);
+ lock_local = dht_local_init(lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL)
+ goto done;
- dht_inode_ctx_time_update (local->loc.parent,
- this,
- &local->postparent,
- 1);
+ lock_local->lock[0].ns.parent_layout.locks = local->lock[0]
+ .ns.parent_layout.locks;
+ lock_local->lock[0]
+ .ns.parent_layout.lk_count = local->lock[0].ns.parent_layout.lk_count;
- }
+ local->lock[0].ns.parent_layout.locks = NULL;
+ local->lock[0].ns.parent_layout.lk_count = 0;
+ dht_unlock_inodelk(lock_frame, lock_local->lock[0].ns.parent_layout.locks,
+ lock_local->lock[0].ns.parent_layout.lk_count,
+ dht_rmdir_unlock_cbk);
+ lock_frame = NULL;
- DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
- local->op_errno, &local->preparent,
- &local->postparent, NULL);
- }
- }
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY(lock_frame);
+ }
- return 0;
+ return 0;
}
-
-int
-dht_rmdir_do (call_frame_t *frame, xlator_t *this)
+static int
+dht_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int i = 0;
- xlator_t *hashed_subvol = NULL;
- char gfid[GF_UUID_BUF_SIZE] ={0};
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ int done = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ dht_local_t *heal_local = NULL;
+ call_frame_t *heal_frame = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ if ((op_errno != ENOENT) && (op_errno != ESTALE)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
- VALIDATE_OR_GOTO (this->private, err);
+ if (op_errno != EACCES)
+ local->need_selfheal = 1;
+ }
- conf = this->private;
- local = frame->local;
+ gf_uuid_unparse(local->loc.gfid, gfid);
- if (local->op_ret == -1)
- goto err;
+ gf_msg_debug(this->name, op_errno,
+ "rmdir on %s for %s failed."
+ "(gfid = %s)",
+ prev->name, local->loc.path, gfid);
+ goto unlock;
+ }
- local->call_cnt = conf->subvolume_cnt;
+ /* Track if rmdir succeeded on at least one subvol*/
+ local->fop_succeeded = 1;
+ dht_iatt_merge(this, &local->preparent, preparent);
+ dht_iatt_merge(this, &local->postparent, postparent);
+ }
+unlock:
+ UNLOCK(&frame->lock);
+ this_call_cnt = dht_frame_return(frame);
- /* first remove from non-hashed_subvol */
- hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
+ /* if local->hashed_subvol, we are yet to wind to hashed_subvol. */
+ if (local->hashed_subvol && (this_call_cnt == 1)) {
+ done = 1;
+ } else if (!local->hashed_subvol && !this_call_cnt) {
+ done = 1;
+ }
- if (!hashed_subvol) {
- gf_uuid_unparse(local->loc.gfid, gfid);
+ if (done) {
+ if (local->need_selfheal && local->fop_succeeded) {
+ dht_rmdir_unlock(frame, this);
+ local->layout = dht_layout_get(this, local->loc.inode);
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_HASHED_SUBVOL_GET_FAILED,
- "Failed to get hashed subvol for %s (gfid = %s)",
- local->loc.path, gfid);
- } else {
- local->hashed_subvol = hashed_subvol;
- }
+ /* TODO: neater interface needed below */
+ local->stbuf.ia_type = local->loc.inode->ia_type;
+
+ gf_uuid_copy(local->gfid, local->loc.inode->gfid);
+ heal_frame = copy_frame(frame);
+ if (heal_frame == NULL) {
+ goto err;
+ }
+
+ heal_local = dht_local_init(heal_frame, &local->loc, NULL, 0);
+ if (!heal_local) {
+ DHT_STACK_DESTROY(heal_frame);
+ goto err;
+ }
+
+ heal_local->inode = inode_ref(local->loc.inode);
+ heal_local->main_frame = frame;
+ gf_uuid_copy(heal_local->gfid, local->loc.inode->gfid);
+ ret = dht_selfheal_restore(heal_frame, dht_rmdir_selfheal_cbk,
+ &heal_local->loc, heal_local->layout);
+ if (ret) {
+ DHT_STACK_DESTROY(heal_frame);
+ goto err;
+ }
+
+ } else if (this_call_cnt) {
+ /* If non-hashed subvol's have responded, proceed */
+ if (local->op_ret == 0) {
+ /* Delete the dir from the hashed subvol if:
+ * The fop succeeded on at least one subvol
+ * and did not fail on any
+ * or
+ * The fop failed with ENOENT/ESTALE on
+ * all subvols */
+
+ STACK_WIND_COOKIE(frame, dht_rmdir_hashed_subvol_cbk,
+ local->hashed_subvol, local->hashed_subvol,
+ local->hashed_subvol->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ } else {
+ /* hashed-subvol was non-NULL and rmdir failed on
+ * all non hashed-subvols. Unwind rmdir with
+ * local->op_ret and local->op_errno. */
+ dht_rmdir_unlock(frame, this);
+ DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
- /* When DHT has only 1 child */
- if (conf->subvolume_cnt == 1) {
- STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk,
- conf->subvolumes[0],
- conf->subvolumes[0]->fops->rmdir,
- &local->loc, local->flags, NULL);
return 0;
- }
+ }
+ } else if (!this_call_cnt) {
+ /* All subvol's have responded, proceed */
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (hashed_subvol &&
- (hashed_subvol == conf->subvolumes[i]))
- continue;
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->preparent, 0);
- STACK_WIND (frame, dht_rmdir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->rmdir,
- &local->loc, local->flags, NULL);
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
+
+ dht_rmdir_unlock(frame, this);
+ DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
}
+ }
- return 0;
+ return 0;
err:
- DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, NULL);
- return 0;
+ DHT_STACK_UNWIND(rmdir, frame, -1, local->op_errno, NULL, NULL, NULL);
+ return 0;
}
-
-int
-dht_rmdir_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+static int
+dht_rmdir_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *src = NULL;
- call_frame_t *main_frame = NULL;
- dht_local_t *main_local = NULL;
- int this_call_cnt = 0;
- char gfid[GF_UUID_BUF_SIZE] ={0};
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *hashed_subvol;
+ conf = this->private;
+ local = frame->local;
- local = frame->local;
- prev = cookie;
- src = prev->this;
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR,
+ "acquiring entrylk after inodelk failed rmdir for %s)",
+ local->loc.path);
- main_frame = local->main_frame;
- main_local = main_frame->local;
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto err;
+ }
- gf_uuid_unparse(local->loc.gfid, gfid);
+ hashed_subvol = local->hashed_subvol;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (hashed_subvol && (hashed_subvol == conf->subvolumes[i]))
+ continue;
- if (op_ret == 0) {
- gf_msg_trace (this->name, 0,
- "Unlinked linkfile %s on %s, gfid = %s",
- local->loc.path, src->name, gfid);
- } else {
- main_local->op_ret = -1;
- main_local->op_errno = op_errno;
- gf_msg_debug (this->name, 0,
- "Unlink of %s on %s failed. (gfid = %s) (%s)",
- local->loc.path, src->name, gfid,
- strerror (op_errno));
- }
+ STACK_WIND_COOKIE(frame, dht_rmdir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i], conf->subvolumes[i]->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ }
- this_call_cnt = dht_frame_return (main_frame);
- if (is_last_call (this_call_cnt))
- dht_rmdir_do (main_frame, this);
+ return 0;
- DHT_STACK_DESTROY (frame);
- return 0;
-}
+err:
+ DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
+ return 0;
+}
-int
-dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *stbuf, dict_t *xattr, struct iatt *parent)
+static int
+dht_rmdir_do(call_frame_t *frame, xlator_t *this)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *src = NULL;
- call_frame_t *main_frame = NULL;
- dht_local_t *main_local = NULL;
- int this_call_cnt = 0;
- dht_conf_t *conf = this->private;
- char gfid[GF_UUID_BUF_SIZE] = {0};
-
- local = frame->local;
- prev = cookie;
- src = prev->this;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ xlator_t *hashed_subvol = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
- main_frame = local->main_frame;
- main_local = main_frame->local;
+ VALIDATE_OR_GOTO(frame->local, err);
+ local = frame->local;
+ VALIDATE_OR_GOTO(this->private, out);
+ conf = this->private;
- if (op_ret != 0)
- goto err;
+ if (local->op_ret == -1)
+ goto out;
- if (!check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) {
- main_local->op_ret = -1;
- main_local->op_errno = ENOTEMPTY;
+ local->call_cnt = conf->subvolume_cnt;
- gf_uuid_unparse(local->loc.gfid, gfid);
+ /* first remove from non-hashed_subvol */
+ hashed_subvol = dht_subvol_get_hashed(this, &local->loc);
- gf_log (this->name, GF_LOG_WARNING,
- "%s on %s is not a linkfile (type=0%o, gfid = %s)",
- local->loc.path, src->name, stbuf->ia_type, gfid);
- goto err;
- }
+ if (!hashed_subvol) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
- STACK_WIND (frame, dht_rmdir_linkfile_unlink_cbk,
- src, src->fops->unlink, &local->loc, 0, NULL);
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get hashed subvol for %s (gfid = %s)",
+ local->loc.path, gfid);
+ } else {
+ local->hashed_subvol = hashed_subvol;
+ }
+
+ /* When DHT has only 1 child */
+ if (conf->subvolume_cnt == 1) {
+ STACK_WIND_COOKIE(frame, dht_rmdir_hashed_subvol_cbk,
+ conf->subvolumes[0], conf->subvolumes[0],
+ conf->subvolumes[0]->fops->rmdir, &local->loc,
+ local->flags, NULL);
return 0;
+ }
+
+ local->current = &local->lock[0];
+ ret = dht_protect_namespace(frame, &local->loc, local->hashed_subvol,
+ &local->current->ns, dht_rmdir_lock_cbk);
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = errno ? errno : EINVAL;
+ goto out;
+ }
+
+ return 0;
+
+out:
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
+
+ DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
+ return 0;
err:
+ DHT_STACK_UNWIND(rmdir, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+}
+
+static void
+dht_rmdir_readdirp_done(call_frame_t *readdirp_frame, xlator_t *this)
+{
+ call_frame_t *main_frame = NULL;
+ dht_local_t *main_local = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+
+ local = readdirp_frame->local;
+ main_frame = local->main_frame;
+ main_local = main_frame->local;
+
+ /* At least one readdirp failed.
+ * This is a bit hit or miss - if readdirp failed on more than
+ * one subvol, we don't know which error is returned.
+ */
+ if (local->op_ret == -1) {
+ main_local->op_ret = local->op_ret;
+ main_local->op_errno = local->op_errno;
+ }
+
+ this_call_cnt = dht_frame_return(main_frame);
+
+ if (is_last_call(this_call_cnt))
+ dht_rmdir_do(main_frame, this);
+
+ DHT_STACK_DESTROY(readdirp_frame);
+}
+
+/* Keep sending readdirp on the subvol until it returns no more entries
+ * It is possible that not all entries will fit in a single readdirp in
+ * which case the rmdir will keep failing with ENOTEMPTY
+ */
- this_call_cnt = dht_frame_return (main_frame);
- if (is_last_call (this_call_cnt))
- dht_rmdir_do (main_frame, this);
+static int
+dht_rmdir_readdirp_do(call_frame_t *readdirp_frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
- DHT_STACK_DESTROY (frame);
+ local = readdirp_frame->local;
+
+ if (local->op_ret == -1) {
+ /* there is no point doing another readdirp on this
+ * subvol . */
+ dht_rmdir_readdirp_done(readdirp_frame, this);
return 0;
-}
+ }
+ STACK_WIND_COOKIE(readdirp_frame, dht_rmdir_readdirp_cbk,
+ local->hashed_subvol, local->hashed_subvol,
+ local->hashed_subvol->fops->readdirp, local->fd, 4096, 0,
+ local->xattr);
-int
-dht_rmdir_cached_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *stbuf, dict_t *xattr,
- struct iatt *parent)
-{
- dht_local_t *local = NULL;
- xlator_t *src = NULL;
- call_frame_t *main_frame = NULL;
- dht_local_t *main_local = NULL;
- int this_call_cnt = 0;
- dht_conf_t *conf = this->private;
- dict_t *xattrs = NULL;
- int ret = 0;
+ return 0;
+}
- local = frame->local;
- src = local->hashed_subvol;
+static int
+dht_rmdir_linkfile_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *src = NULL;
+ call_frame_t *readdirp_frame = NULL;
+ dht_local_t *readdirp_local = NULL;
+ int this_call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ prev = cookie;
+ src = prev;
+
+ readdirp_frame = local->main_frame;
+ readdirp_local = readdirp_frame->local;
+
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ if (op_ret == 0) {
+ gf_msg_trace(this->name, 0, "Unlinked linkfile %s on %s, gfid = %s",
+ local->loc.path, src->name, gfid);
+ } else {
+ if (op_errno != ENOENT) {
+ readdirp_local->op_ret = -1;
+ readdirp_local->op_errno = op_errno;
+ }
+ gf_msg_debug(this->name, op_errno,
+ "Unlink of %s on %s failed. (gfid = %s)", local->loc.path,
+ src->name, gfid);
+ }
+
+ this_call_cnt = dht_frame_return(readdirp_frame);
+
+ if (is_last_call(this_call_cnt))
+ dht_rmdir_readdirp_do(readdirp_frame, this);
+
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
- main_frame = local->main_frame;
- main_local = main_frame->local;
+static int
+dht_rmdir_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr, struct iatt *parent)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *src = NULL;
+ call_frame_t *readdirp_frame = NULL;
+ dht_local_t *readdirp_local = NULL;
+ int this_call_cnt = 0;
+ dht_conf_t *conf = this->private;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ prev = cookie;
+ src = prev;
+
+ gf_msg_debug(this->name, 0, "dht_rmdir_lookup_cbk %s", local->loc.path);
+
+ readdirp_frame = local->main_frame;
+ readdirp_local = readdirp_frame->local;
+
+ if (op_ret != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_FILE_LOOKUP_FAILED,
+ "lookup failed for %s on %s", local->loc.path, src->name);
+ goto err;
+ }
+
+ if (!check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name)) {
+ readdirp_local->op_ret = -1;
+ readdirp_local->op_errno = ENOTEMPTY;
- if (op_ret == 0) {
- main_local->op_ret = -1;
- main_local->op_errno = ENOTEMPTY;
+ gf_uuid_unparse(local->loc.gfid, gfid);
- gf_log (this->name, GF_LOG_WARNING,
- "%s found on cached subvol %s",
- local->loc.path, src->name);
- goto err;
- } else if (op_errno != ENOENT) {
- main_local->op_ret = -1;
- main_local->op_errno = op_errno;
- goto err;
- }
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NOT_LINK_FILE_ERROR,
+ "%s on %s is not a linkfile (type=0%o, gfid = %s)",
+ local->loc.path, src->name, stbuf->ia_type, gfid);
+ goto err;
+ }
- xattrs = dict_new ();
- if (!xattrs) {
- gf_log (this->name, GF_LOG_ERROR, "dict_new failed");
- goto err;
- }
+ STACK_WIND_COOKIE(frame, dht_rmdir_linkfile_unlink_cbk, src, src,
+ src->fops->unlink, &local->loc, 0, NULL);
+ return 0;
+err:
- ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value: key = %s",
- conf->link_xattr_name);
- if (xattrs)
- dict_unref (xattrs);
- goto err;
- }
+ this_call_cnt = dht_frame_return(readdirp_frame);
+ if (is_last_call(this_call_cnt)) {
+ dht_rmdir_readdirp_do(readdirp_frame, this);
+ }
- STACK_WIND (frame, dht_rmdir_lookup_cbk,
- src, src->fops->lookup, &local->loc, xattrs);
- if (xattrs)
- dict_unref (xattrs);
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
- return 0;
+static int
+dht_rmdir_cached_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *parent)
+{
+ dht_local_t *local = NULL;
+ xlator_t *src = NULL;
+ call_frame_t *readdirp_frame = NULL;
+ dht_local_t *readdirp_local = NULL;
+ int this_call_cnt = 0;
+ dht_conf_t *conf = this->private;
+ dict_t *xattrs = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ src = local->hashed_subvol;
+
+ /* main_frame here is the readdirp_frame */
+
+ readdirp_frame = local->main_frame;
+ readdirp_local = readdirp_frame->local;
+
+ gf_msg_debug(this->name, 0, "returning for %s ", local->loc.path);
+
+ if (op_ret == 0) {
+ readdirp_local->op_ret = -1;
+ readdirp_local->op_errno = ENOTEMPTY;
+
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SUBVOL_ERROR,
+ "%s found on cached subvol %s", local->loc.path, src->name);
+ goto err;
+ } else if (op_errno != ENOENT) {
+ readdirp_local->op_ret = -1;
+ readdirp_local->op_errno = op_errno;
+
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_SUBVOL_ERROR,
+ "%s not found on cached subvol %s", local->loc.path, src->name);
+ goto err;
+ }
+
+ xattrs = dict_new();
+ if (!xattrs) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "dict_new failed");
+ goto err;
+ }
+
+ ret = dict_set_uint32(xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value: key = %s",
+ conf->link_xattr_name);
+ if (xattrs)
+ dict_unref(xattrs);
+ goto err;
+ }
+ STACK_WIND_COOKIE(frame, dht_rmdir_lookup_cbk, src, src, src->fops->lookup,
+ &local->loc, xattrs);
+ if (xattrs)
+ dict_unref(xattrs);
+
+ return 0;
err:
- this_call_cnt = dht_frame_return (main_frame);
- if (is_last_call (this_call_cnt))
- dht_rmdir_do (main_frame, this);
+ this_call_cnt = dht_frame_return(readdirp_frame);
- DHT_STACK_DESTROY (frame);
- return 0;
+ /* Once all the lookups/unlinks etc have returned, proceed to wind
+ * readdirp on the subvol again until no entries are returned.
+ * This is required if there are more entries than can be returned
+ * in a single readdirp call.
+ */
+
+ if (is_last_call(this_call_cnt))
+ dht_rmdir_readdirp_do(readdirp_frame, this);
+
+ DHT_STACK_DESTROY(frame);
+ return 0;
}
+static int
+dht_rmdir_is_subvol_empty(call_frame_t *frame, xlator_t *this,
+ gf_dirent_t *entries, xlator_t *src)
+{
+ int ret = 0;
+ int build_ret = 0;
+ gf_dirent_t *trav = NULL;
+ call_frame_t *lookup_frame = NULL;
+ dht_local_t *lookup_local = NULL;
+ dht_local_t *local = NULL;
+ dict_t *xattrs = NULL;
+ dht_conf_t *conf = this->private;
+ xlator_t *subvol = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ int count = 0;
+ gf_boolean_t unwind = _gf_false;
+
+ local = frame->local;
+
+ list_for_each_entry(trav, &entries->list, list)
+ {
+ if (strcmp(trav->d_name, ".") == 0)
+ continue;
+ if (strcmp(trav->d_name, "..") == 0)
+ continue;
+ if (check_is_linkfile(NULL, (&trav->d_stat), trav->dict,
+ conf->link_xattr_name)) {
+ count++;
+ continue;
+ }
+
+ /* this entry is either a directory which is neither "." nor "..",
+ or a non directory which is not a linkfile. the directory is to
+ be treated as non-empty
+ */
+ return 0;
+ }
-int
-dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
- gf_dirent_t *entries, xlator_t *src)
-{
- int ret = 0;
- int build_ret = 0;
- gf_dirent_t *trav = NULL;
- call_frame_t *lookup_frame = NULL;
- dht_local_t *lookup_local = NULL;
- dht_local_t *local = NULL;
- dict_t *xattrs = NULL;
- dht_conf_t *conf = this->private;
- xlator_t *subvol = NULL;
- char gfid[GF_UUID_BUF_SIZE] = {0};
+ xattrs = dict_new();
+ if (!xattrs) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "dict_new failed");
+ return -1;
+ }
- local = frame->local;
+ ret = dict_set_uint32(xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value: key = %s",
+ conf->link_xattr_name);
- list_for_each_entry (trav, &entries->list, list) {
- if (strcmp (trav->d_name, ".") == 0)
- continue;
- if (strcmp (trav->d_name, "..") == 0)
- continue;
- if (check_is_linkfile (NULL, (&trav->d_stat), trav->dict,
- conf->link_xattr_name)) {
- ret++;
- continue;
- }
+ if (xattrs)
+ dict_unref(xattrs);
+ return -1;
+ }
- /* this entry is either a directory which is neither "." nor "..",
- or a non directory which is not a linkfile. the directory is to
- be treated as non-empty
- */
- return 0;
+ local->call_cnt = count;
+ ret = 0;
+
+ list_for_each_entry(trav, &entries->list, list)
+ {
+ if (strcmp(trav->d_name, ".") == 0)
+ continue;
+ if (strcmp(trav->d_name, "..") == 0)
+ continue;
+
+ lookup_frame = copy_frame(frame);
+
+ if (!lookup_frame) {
+ /* out of memory, let the rmdir fail
+ (as non-empty, unfortunately) */
+ goto err;
}
- xattrs = dict_new ();
- if (!xattrs) {
- gf_log (this->name, GF_LOG_ERROR, "dict_new failed");
- return -1;
+ lookup_local = dht_local_init(lookup_frame, NULL, NULL, GF_FOP_LOOKUP);
+ if (!lookup_local) {
+ goto err;
}
- ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value: key = %s",
- conf->link_xattr_name);
-
- if (xattrs)
- dict_unref (xattrs);
- return -1;
- }
-
- list_for_each_entry (trav, &entries->list, list) {
- if (strcmp (trav->d_name, ".") == 0)
- continue;
- if (strcmp (trav->d_name, "..") == 0)
- continue;
-
- lookup_frame = NULL;
- lookup_local = NULL;
-
- lookup_frame = copy_frame (frame);
- if (!lookup_frame) {
- /* out of memory, let the rmdir fail
- (as non-empty, unfortunately) */
- goto err;
- }
+ lookup_frame->local = lookup_local;
+ lookup_local->main_frame = frame;
+ lookup_local->hashed_subvol = src;
- lookup_local = mem_get0 (this->local_pool);
- if (!lookup_local) {
- goto err;
- }
+ build_ret = dht_build_child_loc(this, &lookup_local->loc, &local->loc,
+ trav->d_name);
+ if (build_ret != 0)
+ goto err;
- lookup_frame->local = lookup_local;
- lookup_local->main_frame = frame;
- lookup_local->hashed_subvol = src;
+ gf_uuid_copy(lookup_local->loc.gfid, trav->d_stat.ia_gfid);
- build_ret = dht_build_child_loc (this, &lookup_local->loc,
- &local->loc, trav->d_name);
- if (build_ret != 0)
- goto err;
+ gf_uuid_unparse(lookup_local->loc.gfid, gfid);
- gf_uuid_copy (lookup_local->loc.gfid, trav->d_stat.ia_gfid);
+ gf_msg_trace(this->name, 0, "looking up %s on subvolume %s, gfid = %s",
+ lookup_local->loc.path, src->name, gfid);
- gf_uuid_unparse(lookup_local->loc.gfid, gfid);
+ subvol = dht_linkfile_subvol(this, NULL, &trav->d_stat, trav->dict);
+ if (!subvol || (subvol == src)) {
+ /* we need to delete the linkto file if it does not have a
+ * valid subvol or it points to itself.
+ */
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_INVALID_LINKFILE,
+ "Linkfile does not have link subvolume. "
+ "path = %s, gfid = %s",
+ lookup_local->loc.path, gfid);
- gf_msg_trace (this->name, 0,
- "looking up %s on subvolume %s, gfid = %s",
- lookup_local->loc.path, src->name, gfid);
+ gf_msg_debug(this->name, 0, "looking up %s on subvol %s, gfid = %s",
+ lookup_local->loc.path, src->name, gfid);
- LOCK (&frame->lock);
- {
- local->call_cnt++;
- }
- UNLOCK (&frame->lock);
-
- subvol = dht_linkfile_subvol (this, NULL, &trav->d_stat,
- trav->dict);
- if (!subvol) {
- gf_log (this->name, GF_LOG_INFO,
- "Linkfile does not have link subvolume. "
- "path = %s, gfid = %s",
- lookup_local->loc.path, gfid);
- STACK_WIND (lookup_frame, dht_rmdir_lookup_cbk,
- src, src->fops->lookup,
- &lookup_local->loc, xattrs);
- } else {
- STACK_WIND (lookup_frame, dht_rmdir_cached_lookup_cbk,
- subvol, subvol->fops->lookup,
- &lookup_local->loc, xattrs);
- }
- ret++;
+ STACK_WIND_COOKIE(lookup_frame, dht_rmdir_lookup_cbk, src, src,
+ src->fops->lookup, &lookup_local->loc, xattrs);
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Looking up linkfile target %s on "
+ " subvol %s, gfid = %s",
+ lookup_local->loc.path, subvol->name, gfid);
+
+ STACK_WIND(lookup_frame, dht_rmdir_cached_lookup_cbk, subvol,
+ subvol->fops->lookup, &lookup_local->loc, xattrs);
}
+ ret++;
- if (xattrs)
- dict_unref (xattrs);
+ lookup_frame = NULL;
+ lookup_local = NULL;
+ }
- return ret;
+ if (xattrs)
+ dict_unref(xattrs);
+
+ return ret;
err:
- if (xattrs)
- dict_unref (xattrs);
+ if (xattrs)
+ dict_unref(xattrs);
- DHT_STACK_DESTROY (lookup_frame);
- return 0;
+ if (lookup_frame)
+ DHT_STACK_DESTROY(lookup_frame);
+
+ /* Handle the case where the wound calls have unwound before the
+ * loop processing is done
+ */
+
+ LOCK(&frame->lock);
+ {
+ local->op_ret = -1;
+ local->op_errno = ENOTEMPTY;
+
+ local->call_cnt -= (count - ret);
+ if (!local->call_cnt)
+ unwind = _gf_true;
+ }
+ UNLOCK(&frame->lock);
+
+ if (!unwind) {
+ return ret;
+ }
+ return 0;
}
+/*
+ * No more entries on this subvol. Proceed to the actual rmdir operation.
+ */
-int
-dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries,
- dict_t *xdata)
+static int
+dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
- xlator_t *src = NULL;
- int ret = 0;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *src = NULL;
+ int ret = 0;
+ char *path = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ src = prev;
+
+ if (op_ret > 2) {
+ /* dht_rmdir_is_subvol_empty() may free the frame,
+ * copy path for logging.
+ */
+ path = gf_strdup(local->loc.path);
- local = frame->local;
- prev = cookie;
- src = prev->this;
-
- if (op_ret > 2) {
- ret = dht_rmdir_is_subvol_empty (frame, this, entries, src);
-
- switch (ret) {
- case 0: /* non linkfiles exist */
- gf_msg_trace (this->name, 0,
- "readdir on %s for %s returned %d "
- "entries", prev->this->name,
- local->loc.path, op_ret);
- local->op_ret = -1;
- local->op_errno = ENOTEMPTY;
- break;
- default:
- /* @ret number of linkfiles are getting unlinked */
- gf_msg_trace (this->name, 0,
- "readdir on %s for %s found %d "
- "linkfiles", prev->this->name,
- local->loc.path, ret);
- break;
- }
+ ret = dht_rmdir_is_subvol_empty(frame, this, entries, src);
+
+ switch (ret) {
+ case 0: /* non linkfiles exist */
+ gf_msg_trace(this->name, 0,
+ "readdir on %s for %s returned %d "
+ "entries",
+ prev->name, local->loc.path, op_ret);
+ local->op_ret = -1;
+ local->op_errno = ENOTEMPTY;
+ break;
+ default:
+ /* @ret number of linkfiles are getting unlinked */
+ gf_msg_trace(this->name, 0,
+ "readdir on %s for %s found %d "
+ "linkfiles",
+ prev->name, path, ret);
+ break;
}
+ }
- this_call_cnt = dht_frame_return (frame);
+ /* readdirp failed or no linkto files were found on this subvol */
+ if (!ret)
+ dht_rmdir_readdirp_done(frame, this);
+
+ GF_FREE(path);
+ return 0;
+}
+
+static int
+dht_rmdir_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ xlator_t *prev = NULL;
+ int ret = 0;
+ dht_conf_t *conf = this->private;
+ dict_t *dict = NULL;
+ int i = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ dht_local_t *readdirp_local = NULL;
+ call_frame_t *readdirp_frame = NULL;
+ int cnt = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ this_call_cnt = dht_frame_return(frame);
+ if (op_ret == -1) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
- if (is_last_call (this_call_cnt)) {
- dht_rmdir_do (frame, this);
+ gf_msg_debug(this->name, op_errno,
+ "opendir on %s for %s failed, "
+ "gfid = %s,",
+ prev->name, local->loc.path, gfid);
+ if ((op_errno != ENOENT) && (op_errno != ESTALE)) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
}
+ goto err;
+ }
+ if (!is_last_call(this_call_cnt))
return 0;
-}
+ if (local->op_ret == -1)
+ goto err;
-int
-dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
- dict_t *dict = NULL;
- int ret = 0;
- dht_conf_t *conf = this->private;
- int i = 0;
- char gfid[GF_UUID_BUF_SIZE] = {0};
+ fd_bind(fd);
- local = frame->local;
- prev = cookie;
+ dict = dict_new();
+ if (!dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+ ret = dict_set_uint32(dict, conf->link_xattr_name, 256);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value:key = %s", local->loc.path,
+ conf->link_xattr_name);
- this_call_cnt = dht_frame_return (frame);
- if (op_ret == -1) {
- gf_uuid_unparse(local->loc.gfid, gfid);
-
- gf_msg_debug (this->name, 0,
- "opendir on %s for %s failed, "
- "gfid = %s, (%s)",
- prev->this->name, local->loc.path, gfid,
- strerror (op_errno));
- if ((op_errno != ENOENT) && (op_errno != ESTALE)) {
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
- goto err;
- }
+ cnt = local->call_cnt = conf->subvolume_cnt;
- if (!is_last_call (this_call_cnt))
- return 0;
+ /* Create a separate frame per subvol as we might need
+ * to resend readdirp multiple times to get all the
+ * entries.
+ */
- if (local->op_ret == -1)
- goto err;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ readdirp_frame = copy_frame(frame);
- dict = dict_new ();
- if (!dict) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto err;
+ if (!readdirp_frame) {
+ cnt--;
+ /* Reduce the local->call_cnt as well */
+ (void)dht_frame_return(frame);
+ continue;
}
- ret = dict_set_uint32 (dict, conf->link_xattr_name, 256);
- if (ret)
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:key = %s",
- local->loc.path, conf->link_xattr_name);
+ readdirp_local = dht_local_init(readdirp_frame, &local->loc, local->fd,
+ 0);
- local->call_cnt = conf->subvolume_cnt;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_rmdir_readdirp_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->readdirp,
- local->fd, 4096, 0, dict);
+ if (!readdirp_local) {
+ DHT_STACK_DESTROY(readdirp_frame);
+ cnt--;
+ /* Reduce the local->call_cnt as well */
+ dht_frame_return(frame);
+ continue;
}
+ readdirp_local->main_frame = frame;
+ readdirp_local->op_ret = 0;
+ readdirp_local->xattr = dict_ref(dict);
+ /* overload this field to save the subvol info */
+ readdirp_local->hashed_subvol = conf->subvolumes[i];
- if (dict)
- dict_unref (dict);
+ STACK_WIND_COOKIE(readdirp_frame, dht_rmdir_readdirp_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->readdirp,
+ readdirp_local->fd, 4096, 0, readdirp_local->xattr);
+ }
- return 0;
+ if (dict)
+ dict_unref(dict);
+
+ /* Could not wind readdirp to any subvol */
+
+ if (!cnt)
+ goto err;
+
+ return 0;
err:
- if (is_last_call (this_call_cnt)) {
- dht_rmdir_do (frame, this);
- }
+ if (is_last_call(this_call_cnt)) {
+ dht_rmdir_do(frame, this);
+ }
- return 0;
+ return 0;
}
-
int
-dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
- dict_t *xdata)
+dht_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- int i = -1;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int ret = -1;
+ dict_t *xattr_req = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_RMDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->call_cnt = conf->subvolume_cnt;
+ local->op_ret = 0;
+ local->fop_succeeded = 0;
+
+ local->flags = flags;
+
+ local->fd = fd_create(local->loc.inode, frame->root->pid);
+ if (!local->fd) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (flags) {
+ return dht_rmdir_do(frame, this);
+ }
+ if (xdata) {
+ xattr_req = dict_ref(xdata);
+ } else {
+ xattr_req = dict_new();
+ }
+ if (xattr_req) {
+ ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256);
+ /* If parallel-readdir is enabled, this is required
+ * to handle stale linkto files in the directory
+ * being deleted. If this fails, log an error but
+ * do not prevent the operation.
+ */
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "%s: failed to set key %s",
+ loc->path, conf->link_xattr_name);
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "%s: failed to set key %s",
+ loc->path, conf->link_xattr_name);
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_rmdir_opendir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->opendir, loc, local->fd,
+ xattr_req);
+ }
+
+ if (xattr_req) {
+ dict_unref(xattr_req);
+ }
+ return 0;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (this->private, err);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(rmdir, frame, -1, op_errno, NULL, NULL, NULL);
- conf = this->private;
+ return 0;
+}
- local = dht_local_init (frame, loc, NULL, GF_FOP_RMDIR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+static int
+dht_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
- local->call_cnt = conf->subvolume_cnt;
- local->op_ret = 0;
- local->fop_succeeded = 0;
+{
+ DHT_STACK_UNWIND(entrylk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
- local->flags = flags;
+/* TODO
+ * Sending entrylk to cached subvol can result in stale lock
+ * as described in the bug 1311002.
+ */
+int
+dht_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_ENTRYLK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_uuid_unparse(loc->gfid, gfid);
- local->fd = fd_create (local->loc.inode, frame->root->pid);
- if (!local->fd) {
+ gf_msg_debug(this->name, 0,
+ "no cached subvolume for path=%s, "
+ "gfid = %s",
+ loc->path, gfid);
+ op_errno = EINVAL;
+ goto err;
+ }
- op_errno = ENOMEM;
- goto err;
- }
+ local->call_cnt = 1;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_rmdir_opendir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->opendir,
- loc, local->fd, NULL);
- }
+ STACK_WIND(frame, dht_entrylk_cbk, subvol, subvol->fops->entrylk, volume,
+ loc, basename, cmd, type, xdata);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (rmdir, frame, -1, op_errno,
- NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(entrylk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
-int
-dht_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+static int
+dht_fentrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata);
- return 0;
+ DHT_STACK_UNWIND(fentrylk, frame, op_ret, op_errno, NULL);
+ return 0;
}
-
int
-dht_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- char gfid[GF_UUID_BUF_SIZE] = {0};
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_ENTRYLK);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
+dht_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_uuid_unparse(loc->gfid, gfid);
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+ VALIDATE_OR_GOTO(fd->inode, err);
- gf_msg_debug (this->name, 0,
- "no cached subvolume for path=%s, "
- "gfid = %s", loc->path, gfid);
- op_errno = EINVAL;
- goto err;
- }
+ gf_uuid_unparse(fd->inode->gfid, gfid);
- local->call_cnt = 1;
+ subvol = dht_subvol_get_cached(this, fd->inode);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0,
+ "No cached subvolume for fd=%p,"
+ " gfid = %s",
+ fd, gfid);
+ op_errno = EINVAL;
+ goto err;
+ }
- STACK_WIND (frame, dht_entrylk_cbk,
- subvol, subvol->fops->entrylk,
- volume, loc, basename, cmd, type, xdata);
+ STACK_WIND(frame, dht_fentrylk_cbk, subvol, subvol->fops->fentrylk, volume,
+ fd, basename, cmd, type, xdata);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fentrylk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
+static int32_t
+dht_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
-int
-dht_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
-{
- DHT_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, NULL);
- return 0;
-}
+ local = frame->local;
+ LOCK(&frame->lock);
+ {
+ if (op_ret < 0 && op_errno != ENOTCONN) {
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK(&frame->lock);
-int
-dht_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ DHT_STACK_UNWIND(ipc, frame, local->op_ret, local->op_errno, NULL);
+ }
+
+out:
+ return 0;
+}
+
+int32_t
+dht_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- char gfid[GF_UUID_BUF_SIZE] = {0};
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+ dht_conf_t *conf = NULL;
+ int call_cnt = 0;
+ int i = 0;
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
+ if (op != GF_IPC_TARGET_UPCALL)
+ goto wind_default;
- gf_uuid_unparse(fd->inode->gfid, gfid);
+ VALIDATE_OR_GOTO(this->private, err);
+ conf = this->private;
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "No cached subvolume for fd=%p,"
- " gfid = %s", fd, gfid);
- op_errno = EINVAL;
- goto err;
- }
+ local = dht_local_init(frame, NULL, NULL, GF_FOP_IPC);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- STACK_WIND (frame, dht_fentrylk_cbk,
- subvol, subvol->fops->fentrylk,
- volume, fd, basename, cmd, type, xdata);
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
- return 0;
+ if (xdata) {
+ if (dict_set_int8(xdata, conf->xattr_name, 0) < 0)
+ goto err;
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND(frame, dht_ipc_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->ipc, op, xdata);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND(ipc, frame, -1, op_errno, NULL);
- return 0;
-}
+ return 0;
+wind_default:
+ STACK_WIND(frame, default_ipc_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ipc, op, xdata);
+ return 0;
+}
int
-dht_forget (xlator_t *this, inode_t *inode)
+dht_forget(xlator_t *this, inode_t *inode)
{
- uint64_t ctx_int = 0;
- dht_inode_ctx_t *ctx = NULL;
- dht_layout_t *layout = NULL;
+ uint64_t ctx_int = 0;
+ dht_inode_ctx_t *ctx = NULL;
+ dht_layout_t *layout = NULL;
- inode_ctx_del (inode, this, &ctx_int);
+ inode_ctx_del(inode, this, &ctx_int);
- if (!ctx_int)
- return 0;
+ if (!ctx_int)
+ return 0;
- ctx = (dht_inode_ctx_t *) (long) ctx_int;
+ ctx = (dht_inode_ctx_t *)(long)ctx_int;
- layout = ctx->layout;
- ctx->layout = NULL;
- dht_layout_unref (this, layout);
- GF_FREE (ctx);
+ layout = ctx->layout;
+ ctx->layout = NULL;
+ dht_layout_unref(this, layout);
+ GF_FREE(ctx);
- return 0;
+ return 0;
}
-
int
-dht_notify (xlator_t *this, int event, void *data, ...)
-{
- xlator_t *subvol = NULL;
- int cnt = -1;
- int i = -1;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int propagate = 0;
-
- int had_heard_from_all = 0;
- int have_heard_from_all = 0;
- struct timeval time = {0,};
- gf_defrag_info_t *defrag = NULL;
- dict_t *dict = NULL;
- gf_defrag_type cmd = 0;
- dict_t *output = NULL;
- va_list ap;
- dht_methods_t *methods = NULL;
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO (this->name, conf, out);
-
- methods = conf->methods;
- GF_VALIDATE_OR_GOTO (this->name, methods, out);
-
- /* had all subvolumes reported status once till now? */
- had_heard_from_all = 1;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!conf->last_event[i]) {
- had_heard_from_all = 0;
- }
- }
-
- switch (event) {
+dht_notify(xlator_t *this, int event, void *data, ...)
+{
+ xlator_t *subvol = NULL;
+ int cnt = -1;
+ int i = -1;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int propagate = 0;
+
+ int had_heard_from_all = 0;
+ int have_heard_from_all = 0;
+ gf_defrag_info_t *defrag = NULL;
+ dict_t *dict = NULL;
+ gf_defrag_type cmd = 0;
+ dict_t *output = NULL;
+ va_list ap;
+ struct gf_upcall *up_data = NULL;
+ struct gf_upcall_cache_invalidation *up_ci = NULL;
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ /* had all subvolumes reported status once till now? */
+ had_heard_from_all = 1;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->last_event[i]) {
+ had_heard_from_all = 0;
+ }
+ }
+
+ switch (event) {
case GF_EVENT_CHILD_UP:
- subvol = data;
+ subvol = data;
- conf->gen++;
+ conf->gen++;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- cnt = i;
- break;
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
}
+ }
- if (cnt == -1) {
- gf_msg_debug (this->name, 0,
- "got GF_EVENT_CHILD_UP bad "
- "subvolume %s",
- subvol->name);
- break;
- }
+ if (cnt == -1) {
+ gf_msg_debug(this->name, 0,
+ "got GF_EVENT_CHILD_UP bad "
+ "subvolume %s",
+ subvol->name);
+ break;
+ }
- gettimeofday (&time, NULL);
- LOCK (&conf->subvolume_lock);
- {
- conf->subvolume_status[cnt] = 1;
- conf->last_event[cnt] = event;
- conf->subvol_up_time[cnt] = time.tv_sec;
- }
- UNLOCK (&conf->subvolume_lock);
+ LOCK(&conf->subvolume_lock);
+ {
+ conf->subvolume_status[cnt] = 1;
+ conf->last_event[cnt] = event;
+ conf->subvol_up_time[cnt] = gf_time();
+ }
+ UNLOCK(&conf->subvolume_lock);
- /* one of the node came back up, do a stat update */
- dht_get_du_info_for_subvol (this, cnt);
+ /* one of the node came back up, do a stat update */
+ dht_get_du_info_for_subvol(this, cnt);
- break;
+ break;
- case GF_EVENT_CHILD_MODIFIED:
- subvol = data;
+ case GF_EVENT_SOME_DESCENDENT_UP:
+ subvol = data;
+ conf->gen++;
+ propagate = 1;
- conf->gen++;
- propagate = 1;
+ break;
- break;
+ case GF_EVENT_SOME_DESCENDENT_DOWN:
+ subvol = data;
+ propagate = 1;
- case GF_EVENT_CHILD_DOWN:
- subvol = data;
-
- if (conf->assert_no_child_down) {
- gf_log (this->name, GF_LOG_WARNING,
- "Received CHILD_DOWN. Exiting");
- if (conf->defrag) {
- gf_defrag_stop (conf->defrag,
- GF_DEFRAG_STATUS_FAILED, NULL);
- } else {
- kill (getpid(), SIGTERM);
- }
- }
+ break;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- cnt = i;
- break;
- }
- }
+ case GF_EVENT_CHILD_DOWN:
+ subvol = data;
- if (cnt == -1) {
- gf_msg_debug (this->name, 0,
- "got GF_EVENT_CHILD_DOWN bad "
- "subvolume %s", subvol->name);
- break;
+ if (conf->assert_no_child_down) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_CHILD_DOWN,
+ "Received CHILD_DOWN. Exiting");
+ if (conf->defrag) {
+ gf_defrag_stop(conf, GF_DEFRAG_STATUS_FAILED, NULL);
+ } else {
+ kill(getpid(), SIGTERM);
}
+ }
- LOCK (&conf->subvolume_lock);
- {
- conf->subvolume_status[cnt] = 0;
- conf->last_event[cnt] = event;
- conf->subvol_up_time[cnt] = 0;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
}
- UNLOCK (&conf->subvolume_lock);
+ }
- for (i = 0; i < conf->subvolume_cnt; i++)
- if (conf->last_event[i] != event)
- event = GF_EVENT_CHILD_MODIFIED;
+ if (cnt == -1) {
+ gf_msg_debug(this->name, 0,
+ "got GF_EVENT_CHILD_DOWN bad "
+ "subvolume %s",
+ subvol->name);
break;
+ }
- case GF_EVENT_CHILD_CONNECTING:
- subvol = data;
+ LOCK(&conf->subvolume_lock);
+ {
+ conf->subvolume_status[cnt] = 0;
+ conf->last_event[cnt] = event;
+ conf->subvol_up_time[cnt] = 0;
+ }
+ UNLOCK(&conf->subvolume_lock);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- cnt = i;
- break;
- }
- }
+ for (i = 0; i < conf->subvolume_cnt; i++)
+ if (conf->last_event[i] != event)
+ event = GF_EVENT_SOME_DESCENDENT_DOWN;
+ break;
- if (cnt == -1) {
- gf_msg_debug (this->name, 0,
- "got GF_EVENT_CHILD_CONNECTING"
- " bad subvolume %s",
- subvol->name);
- break;
- }
+ case GF_EVENT_CHILD_CONNECTING:
+ subvol = data;
- LOCK (&conf->subvolume_lock);
- {
- conf->last_event[cnt] = event;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
}
- UNLOCK (&conf->subvolume_lock);
+ }
+ if (cnt == -1) {
+ gf_msg_debug(this->name, 0,
+ "got GF_EVENT_CHILD_CONNECTING"
+ " bad subvolume %s",
+ subvol->name);
break;
- case GF_EVENT_VOLUME_DEFRAG:
- {
- if (!conf->defrag) {
- return ret;
- }
- defrag = conf->defrag;
+ }
- dict = data;
- va_start (ap, data);
- output = va_arg (ap, dict_t*);
+ LOCK(&conf->subvolume_lock);
+ {
+ conf->last_event[cnt] = event;
+ }
+ UNLOCK(&conf->subvolume_lock);
- ret = dict_get_int32 (dict, "rebalance-command",
- (int32_t*)&cmd);
- if (ret)
- return ret;
- LOCK (&defrag->lock);
- {
- if (defrag->is_exiting)
- goto unlock;
- if (cmd == GF_DEFRAG_CMD_STATUS)
- gf_defrag_status_get (defrag, output);
- else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
- gf_defrag_start_detach_tier(defrag);
- else if (cmd == GF_DEFRAG_CMD_STOP)
- gf_defrag_stop (defrag,
- GF_DEFRAG_STATUS_STOPPED, output);
- }
-unlock:
- UNLOCK (&defrag->lock);
- return 0;
- break;
- }
+ break;
+ case GF_EVENT_VOLUME_DEFRAG: {
+ if (!conf->defrag) {
+ return ret;
+ }
+ defrag = conf->defrag;
- default:
- propagate = 1;
- break;
- }
+ dict = data;
+ va_start(ap, data);
+ output = va_arg(ap, dict_t *);
+ ret = dict_get_int32(dict, "rebalance-command", (int32_t *)&cmd);
+ if (ret) {
+ va_end(ap);
+ return ret;
+ }
+ LOCK(&defrag->lock);
+ {
+ if (defrag->is_exiting)
+ goto unlock;
+ if ((cmd == GF_DEFRAG_CMD_STATUS) ||
+ (cmd == GF_DEFRAG_CMD_DETACH_STATUS))
+ gf_defrag_status_get(conf, output);
+ else if (cmd == GF_DEFRAG_CMD_DETACH_START)
+ defrag->cmd = GF_DEFRAG_CMD_DETACH_START;
+ else if (cmd == GF_DEFRAG_CMD_STOP ||
+ cmd == GF_DEFRAG_CMD_DETACH_STOP)
+ gf_defrag_stop(conf, GF_DEFRAG_STATUS_STOPPED, output);
+ }
+ unlock:
+ UNLOCK(&defrag->lock);
+ va_end(ap);
+ return ret;
+ break;
+ }
+ case GF_EVENT_UPCALL:
+ up_data = (struct gf_upcall *)data;
+ if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION)
+ break;
+ up_ci = (struct gf_upcall_cache_invalidation *)up_data->data;
+
+ /* Since md-cache will be aggressively filtering lookups,
+ * the stale layout issue will be more pronounced. Hence
+ * when a layout xattr is changed by the rebalance process
+ * notify all the md-cache clients to invalidate the existing
+ * stat cache and send the lookup next time*/
+ if (up_ci->dict && dict_get(up_ci->dict, conf->xattr_name))
+ up_ci->flags |= UP_EXPLICIT_LOOKUP;
+
+ /* TODO: Instead of invalidating iatt, update the new
+ * hashed/cached subvolume in dht inode_ctx */
+ if (IS_DHT_LINKFILE_MODE(&up_ci->stat))
+ up_ci->flags |= UP_EXPLICIT_LOOKUP;
+
+ propagate = 1;
+ break;
+ default:
+ propagate = 1;
+ break;
+ }
+
+ /* have all subvolumes reported status once by now? */
+ have_heard_from_all = 1;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->last_event[i])
+ have_heard_from_all = 0;
+ }
+
+ /* if all subvols have reported status, no need to hide anything
+ or wait for anything else. Just propagate blindly */
+ if (have_heard_from_all) {
+ propagate = 1;
+ }
+
+ if (!had_heard_from_all && have_heard_from_all) {
+ static int run_defrag = 0;
+ /* This is the first event which completes aggregation
+ of events from all subvolumes. If at least one subvol
+ had come up, propagate CHILD_UP, but only this time
+ */
+ event = GF_EVENT_CHILD_DOWN;
- /* have all subvolumes reported status once by now? */
- have_heard_from_all = 1;
for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!conf->last_event[i])
- have_heard_from_all = 0;
- }
-
- /* if all subvols have reported status, no need to hide anything
- or wait for anything else. Just propagate blindly */
- if (have_heard_from_all) {
- propagate = 1;
-
- }
-
-
- if (!had_heard_from_all && have_heard_from_all) {
- /* This is the first event which completes aggregation
- of events from all subvolumes. If at least one subvol
- had come up, propagate CHILD_UP, but only this time
- */
- event = GF_EVENT_CHILD_DOWN;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->last_event[i] == GF_EVENT_CHILD_UP) {
- event = GF_EVENT_CHILD_UP;
- break;
- }
-
- if (conf->last_event[i] == GF_EVENT_CHILD_CONNECTING) {
- event = GF_EVENT_CHILD_CONNECTING;
- /* continue to check other events for CHILD_UP */
- }
- }
+ if (conf->last_event[i] == GF_EVENT_CHILD_UP) {
+ event = GF_EVENT_CHILD_UP;
+ break;
+ }
- /* rebalance is started with assert_no_child_down. So we do
- * not need to handle CHILD_DOWN event here.
- */
- if (conf->defrag) {
- if (methods->migration_needed(this)) {
- ret = gf_thread_create(&conf->defrag->th,
- NULL,
- gf_defrag_start, this);
- if (ret) {
- GF_FREE (conf->defrag);
- conf->defrag = NULL;
- kill (getpid(), SIGTERM);
- }
- } else {
- GF_FREE (conf->defrag);
- conf->defrag = NULL;
- }
- }
+ if (conf->last_event[i] == GF_EVENT_CHILD_CONNECTING) {
+ event = GF_EVENT_CHILD_CONNECTING;
+ /* continue to check other events for CHILD_UP */
+ }
}
- ret = 0;
- if (propagate)
- ret = default_notify (this, event, data);
+ /* Rebalance is started with assert_no_child_down. So we do
+ * not need to handle CHILD_DOWN event here.
+ *
+ * If there is a graph switch, we should not restart the
+ * rebalance daemon. Use 'run_defrag' to indicate if the
+ * thread has already started.
+ */
+ if (conf->defrag && !run_defrag) {
+ run_defrag = 1;
+ ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start,
+ this, "dhtdg");
+ if (ret) {
+ GF_FREE(conf->defrag);
+ conf->defrag = NULL;
+ kill(getpid(), SIGTERM);
+ }
+ }
+ }
+
+ ret = 0;
+ if (propagate)
+ ret = default_notify(this, event, data);
out:
- return ret;
+ return ret;
}
int
-dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this, dht_layout_t **layout)
+dht_inode_ctx_layout_get(inode_t *inode, xlator_t *this, dht_layout_t **layout)
{
- dht_inode_ctx_t *ctx = NULL;
- int ret = -1;
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
- ret = dht_inode_ctx_get (inode, this, &ctx);
+ ret = dht_inode_ctx_get(inode, this, &ctx);
- if (!ret && ctx) {
- if (ctx->layout) {
- if (layout)
- *layout = ctx->layout;
- ret = 0;
- } else {
- ret = -1;
- }
+ if (!ret && ctx) {
+ if (ctx->layout) {
+ if (layout)
+ *layout = ctx->layout;
+ ret = 0;
+ } else {
+ ret = -1;
}
+ }
- return ret;
+ return ret;
}
void
-dht_log_new_layout_for_dir_selfheal (xlator_t *this, loc_t *loc,
- dht_layout_t *layout)
+dht_log_new_layout_for_dir_selfheal(xlator_t *this, loc_t *loc,
+ dht_layout_t *layout)
{
+ char string[2048] = {0};
+ char *output_string = NULL;
+ int len = 0;
+ int off = 0;
+ int i = 0;
+ gf_loglevel_t log_level = gf_log_get_loglevel();
+ int ret = 0;
+
+ if (log_level < GF_LOG_INFO)
+ return;
- char string[2048] = {0};
- char *output_string = NULL;
- int len = 0;
- int off = 0;
- int i = 0;
- gf_loglevel_t log_level = gf_log_get_loglevel();
- int ret = 0;
- int max_string_len = 0;
-
- if (log_level < GF_LOG_INFO)
- return;
+ if (!layout)
+ return;
- if (!layout)
- return;
+ if (!layout->cnt)
+ return;
- if (!layout->cnt)
- return;
+ if (!loc)
+ return;
- if (!loc)
- return;
+ if (!loc->path)
+ return;
- if (!loc->path)
- return;
+ ret = snprintf(string, sizeof(string), "Setting layout of %s with ",
+ loc->path);
- max_string_len = sizeof (string);
+ if (ret < 0)
+ return;
- ret = snprintf (string, max_string_len, "Setting layout of %s with ",
- loc->path);
+ len += ret;
+
+ /* Calculation of total length of the string required to calloc
+ * output_string. Log includes subvolume-name, start-range, end-range
+ * and err value.
+ *
+ * This log will help to debug cases where:
+ * a) Different processes set different layout of a directory.
+ * b) Error captured in lookup, which will be filled in layout->err
+ * (like ENOENT, ESTALE etc)
+ */
+
+ for (i = 0; i < layout->cnt; i++) {
+ ret = snprintf(string, sizeof(string),
+ "[Subvol_name: %s, Err: %d , Start: "
+ "0x%x, Stop: 0x%x, Hash: 0x%x], ",
+ layout->list[i].xlator->name, layout->list[i].err,
+ layout->list[i].start, layout->list[i].stop,
+ layout->list[i].commit_hash);
if (ret < 0)
- return;
+ return;
len += ret;
+ }
- /* Calculation of total length of the string required to calloc
- * output_string. Log includes subvolume-name, start-range, end-range and
- * err value.
- *
- * This log will help to debug cases where:
- * a) Different processes set different layout of a directory.
- * b) Error captured in lookup, which will be filled in layout->err
- * (like ENOENT, ESTALE etc)
- */
+ len++;
- for (i = 0; i < layout->cnt; i++) {
+ output_string = GF_MALLOC(len + 1, gf_common_mt_char);
- ret = snprintf (string, max_string_len,
- "[Subvol_name: %s, Err: %d , Start: "
- "%"PRIu32 " , Stop: %"PRIu32 " ], ",
- layout->list[i].xlator->name,
- layout->list[i].err, layout->list[i].start,
- layout->list[i].stop);
+ if (!output_string)
+ return;
- if (ret < 0)
- return;
+ ret = snprintf(output_string, len + 1, "Setting layout of %s with ",
+ loc->path);
- len += ret;
+ if (ret < 0)
+ goto err;
- }
+ off += ret;
- len++;
+ for (i = 0; i < layout->cnt; i++) {
+ ret = snprintf(output_string + off, len - off,
+ "[Subvol_name: %s, Err: %d , Start: "
+ "0x%x, Stop: 0x%x, Hash: 0x%x], ",
+ layout->list[i].xlator->name, layout->list[i].err,
+ layout->list[i].start, layout->list[i].stop,
+ layout->list[i].commit_hash);
- output_string = GF_CALLOC (len, sizeof (char), gf_common_mt_char);
+ if (ret < 0)
+ goto err;
- if (!output_string)
- return;
+ off += ret;
+ }
- ret = snprintf (output_string, len, "Setting layout of %s with ",
- loc->path);
+ gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_LOG_FIXED_LAYOUT, "%s",
+ output_string);
- if (ret < 0)
- goto err;
+err:
+ GF_FREE(output_string);
+}
- off += ret;
+int32_t
+dht_migration_get_dst_subvol(xlator_t *this, dht_local_t *local)
+{
+ int ret = -1;
+ if (!local)
+ goto out;
- for (i = 0; i < layout->cnt; i++) {
+ local->rebalance.target_node = dht_subvol_get_hashed(this, &local->loc);
+
+ if (local->rebalance.target_node)
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/*
+This function should not be called more then once during a FOP
+handling path. It is valid only for for ops on files
+*/
+int32_t
+dht_set_local_rebalance(xlator_t *this, dht_local_t *local, struct iatt *stbuf,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ if (!local)
+ return -1;
- ret = snprintf (output_string + off, len - off,
- "[Subvol_name: %s, Err: %d , Start: "
- "%"PRIu32 " , Stop: %"PRIu32 " ], ",
- layout->list[i].xlator->name,
- layout->list[i].err, layout->list[i].start,
- layout->list[i].stop);
+ if (local->rebalance.set) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_REBAL_STRUCT_SET,
+ "local->rebalance already set");
+ }
- if (ret < 0)
- goto err;
+ if (stbuf)
+ memcpy(&local->rebalance.stbuf, stbuf, sizeof(struct iatt));
- off += ret;
+ if (prebuf)
+ memcpy(&local->rebalance.prebuf, prebuf, sizeof(struct iatt));
- }
+ if (postbuf)
+ memcpy(&local->rebalance.postbuf, postbuf, sizeof(struct iatt));
- gf_msg (this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_FIXED_LAYOUT,
- "%s", output_string);
+ if (xdata)
+ local->rebalance.xdata = dict_ref(xdata);
+
+ local->rebalance.set = 1;
+
+ return 0;
+}
+
+int32_t
+dht_release(xlator_t *this, fd_t *fd)
+{
+ return dht_fd_ctx_destroy(this, fd);
+}
+
+static int
+dht_pt_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!op_ret) {
+ dht_layout_set(this, inode, local->layout);
+ }
+
+ DHT_STACK_UNWIND(mkdir, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, NULL);
+
+ return 0;
+}
+
+int32_t
+dht_pt_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ bool free_xdata = false;
+ int ret = 0;
+ int op_errno = 0;
+ int32_t *disk_layout_p = NULL;
+
+ conf = this->private;
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_MKDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!layout)
+ goto wind;
+
+ local->layout = layout;
+
+ if (!xdata) {
+ xdata = dict_new();
+ if (!xdata)
+ goto wind;
+ free_xdata = true;
+ }
+
+ /*Set the xlator or the following will crash*/
+ layout->list[0].xlator = conf->subvolumes[0];
+
+ dht_selfheal_layout_new_directory(frame, loc, layout);
+
+ dht_disk_layout_extract(this, layout, 0, &disk_layout_p);
+
+ ret = dict_set_bin(xdata, conf->xattr_name, disk_layout_p, 4 * 4);
+ if (ret) {
+ gf_msg("dht", GF_LOG_DEBUG, EINVAL, DHT_MSG_DICT_SET_FAILED,
+ "dht layout dict set failed");
+ }
+wind:
+ STACK_WIND(frame, dht_pt_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+ if (free_xdata)
+ dict_unref(xdata);
+ return 0;
err:
- GF_FREE (output_string);
+ op_errno = local ? local->op_errno : op_errno;
+ DHT_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
}
-int32_t dht_migration_get_dst_subvol(xlator_t *this, dht_local_t *local)
+static int
+dht_pt_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
- int ret = -1;
+ dht_conf_t *conf = NULL;
- if (!local)
- goto out;
+ conf = this->private;
+ dict_del(xattr, conf->xattr_name);
+ dict_del(xattr, conf->mds_xattr_key);
+ dict_del(xattr, conf->commithash_xattr_name);
- local->rebalance.target_node =
- dht_subvol_get_hashed (this, &local->loc);
+ if (frame->root->pid >= 0) {
+ GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr);
+ GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr);
+ }
- if (local->rebalance.target_node)
- ret = 0;
+ DHT_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata);
+ return 0;
+}
-out:
- return ret;
+int
+dht_pt_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *key, dict_t *xdata)
+{
+ STACK_WIND(frame, dht_pt_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, key, xdata);
+ return 0;
}
-int32_t dht_migration_needed(xlator_t *this)
+static int
+dht_pt_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- int ret = 0;
+ dht_conf_t *conf = NULL;
- conf = this->private;
+ conf = this->private;
+ dict_del(xattr, conf->xattr_name);
- GF_VALIDATE_OR_GOTO ("dht", conf, out);
- GF_VALIDATE_OR_GOTO ("dht", conf->defrag, out);
+ if (frame->root->pid >= 0) {
+ GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr);
+ GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr);
+ }
- defrag = conf->defrag;
+ DHT_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, xattr, xdata);
+ return 0;
+}
- if (defrag->cmd != GF_DEFRAG_CMD_START_TIER)
- ret = 1;
+int
+dht_pt_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+ dict_t *xdata)
+{
+ STACK_WIND(frame, dht_pt_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata);
+ return 0;
+}
-out:
- return ret;
+/* The job of this function is to check if all the xlators have updated
+ * error in the layout. */
+int
+dht_dir_layout_error_check(xlator_t *this, inode_t *inode)
+{
+ dht_layout_t *layout = NULL;
+ int i = 0;
+
+ layout = dht_layout_get(this, inode);
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == 0) {
+ return 0;
+ }
+ }
+
+ /* Returning the first xlator error as all xlators have errors */
+ return layout->list[0].err;
}
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 43f7c0264f5..fe0dc3db34a 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -8,99 +8,149 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <regex.h>
-#include <signal.h>
#include "dht-mem-types.h"
#include "dht-messages.h"
+#include <glusterfs/call-stub.h>
#include "libxlator.h"
-#include "syncop.h"
+#include <glusterfs/syncop.h>
+#include <glusterfs/refcount.h>
+#include <glusterfs/timer.h>
+#include "protocol-common.h"
+#include <glusterfs/glusterfs-acl.h>
#ifndef _DHT_H
#define _DHT_H
-#define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout"
-#define GF_XATTR_FILE_MIGRATE_KEY "trusted.distribute.migrate-data"
-#define GF_DHT_LOOKUP_UNHASHED_ON 1
+#define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout"
+#define GF_XATTR_FILE_MIGRATE_KEY "trusted.distribute.migrate-data"
+#define DHT_MDS_STR "mds"
+#define GF_DHT_LOOKUP_UNHASHED_OFF 0
+#define GF_DHT_LOOKUP_UNHASHED_ON 1
#define GF_DHT_LOOKUP_UNHASHED_AUTO 2
-#define DHT_PATHINFO_HEADER "DISTRIBUTE:"
-#define DHT_FILE_MIGRATE_DOMAIN "dht.file.migrate"
-#define DHT_LAYOUT_HEAL_DOMAIN "dht.layout.heal"
+#define DHT_PATHINFO_HEADER "DISTRIBUTE:"
+#define DHT_FILE_MIGRATE_DOMAIN "dht.file.migrate"
+/* Layout synchronization */
+#define DHT_LAYOUT_HEAL_DOMAIN "dht.layout.heal"
+/* Namespace synchronization */
+#define DHT_ENTRY_SYNC_DOMAIN "dht.entry.sync"
+#define DHT_LAYOUT_HASH_INVALID 1
+#define MAX_REBAL_THREADS sysconf(_SC_NPROCESSORS_ONLN)
+
+#define DHT_DIR_STAT_BLOCKS 8
+#define DHT_DIR_STAT_SIZE 4096
+
+/* Virtual xattr for subvols status */
+
+#define DHT_SUBVOL_STATUS_KEY "dht.subvol.status"
+
+/* Virtual xattrs for debugging */
-#include <fnmatch.h>
+#define DHT_DBG_HASHED_SUBVOL_PATTERN "dht.file.hashed-subvol.*"
+#define DHT_DBG_HASHED_SUBVOL_KEY "dht.file.hashed-subvol."
-typedef int (*dht_selfheal_dir_cbk_t) (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata);
-typedef int (*dht_defrag_cbk_fn_t) (xlator_t *this, call_frame_t *frame,
- int ret);
+/* Rebalance nodeuuid flags */
+#define REBAL_NODEUUID_MINE 0x01
+typedef int (*dht_selfheal_dir_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+typedef int (*dht_defrag_cbk_fn_t)(xlator_t *this, xlator_t *dst_node,
+ call_frame_t *frame, int ret);
+
+typedef int (*dht_refresh_layout_unlock)(call_frame_t *frame, xlator_t *this,
+ int op_ret, int invoke_cbk);
+
+typedef int (*dht_refresh_layout_done_handle)(call_frame_t *frame);
struct dht_layout {
- int spread_cnt; /* layout spread count per directory,
- is controlled by 'setxattr()' with
- special key */
- int cnt;
- int preset;
- int gen;
- int type;
- int ref; /* use with dht_conf_t->layout_lock */
- gf_boolean_t search_unhashed;
- struct {
- int err; /* 0 = normal
- -1 = dir exists and no xattr
- >0 = dir lookup failed with errno
- */
- uint32_t start;
- uint32_t stop;
- xlator_t *xlator;
- } list[];
+ int spread_cnt; /* layout spread count per directory,
+ is controlled by 'setxattr()' with
+ special key */
+ int cnt;
+ int preset;
+ /*
+ * The last *configuration* state for which this directory was known
+ * to be in balance. The corresponding vol_commit_hash changes
+ * whenever bricks are added or removed. This value changes when a
+ * (full) rebalance is complete. If they match, it's safe to assume
+ * that every file is where it should be and there's no need to do
+ * lookups for files elsewhere. If they don't, then we have to do a
+ * global lookup to be sure.
+ */
+ uint32_t commit_hash;
+ /*
+ * The *runtime* state of the volume, changes when connections to
+ * bricks are made or lost.
+ */
+ int gen;
+ int type;
+ gf_atomic_t ref; /* use with dht_conf_t->layout_lock */
+ uint32_t search_unhashed;
+ struct {
+ int err; /* 0 = normal
+ -1 = dir exists and no xattr
+ >0 = dir lookup failed with errno
+ */
+ uint32_t start;
+ uint32_t stop;
+ uint32_t commit_hash;
+ xlator_t *xlator;
+ } list[];
};
-typedef struct dht_layout dht_layout_t;
+typedef struct dht_layout dht_layout_t;
struct dht_stat_time {
- uint32_t atime;
- uint32_t atime_nsec;
- uint32_t ctime;
- uint32_t ctime_nsec;
- uint32_t mtime;
- uint32_t mtime_nsec;
+ uint32_t atime;
+ uint32_t atime_nsec;
+ uint32_t ctime;
+ uint32_t ctime_nsec;
+ uint32_t mtime;
+ uint32_t mtime_nsec;
};
typedef struct dht_stat_time dht_stat_time_t;
struct dht_inode_ctx {
- dht_layout_t *layout;
- dht_stat_time_t time;
+ dht_layout_t *layout;
+ dht_stat_time_t time;
+ xlator_t *lock_subvol;
+ xlator_t *mds_subvol; /* This is only used for directories */
};
typedef struct dht_inode_ctx dht_inode_ctx_t;
-
typedef enum {
- DHT_HASH_TYPE_DM,
- DHT_HASH_TYPE_DM_USER,
+ DHT_HASH_TYPE_DM,
+ DHT_HASH_TYPE_DM_USER,
} dht_hashfn_type_t;
+typedef enum {
+ DHT_INODELK,
+ DHT_ENTRYLK,
+} dht_lock_type_t;
+
/* rebalance related */
struct dht_rebalance_ {
- xlator_t *from_subvol;
- xlator_t *target_node;
- off_t offset;
- size_t size;
- int32_t flags;
- int count;
- struct iobref *iobref;
- struct iovec *vector;
- struct iatt stbuf;
- dht_defrag_cbk_fn_t target_op_fn;
- dict_t *xdata;
+ xlator_t *from_subvol;
+ xlator_t *target_node;
+ off_t offset;
+ size_t size;
+ int32_t flags;
+ int count;
+ struct iobref *iobref;
+ struct iovec *vector;
+ struct iatt stbuf;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ dht_defrag_cbk_fn_t target_op_fn;
+ dict_t *xdata;
+ dict_t *xattr;
+ dict_t *dict;
+ struct gf_flock flock;
+ int32_t set;
+ int lock_cmd;
};
/**
@@ -108,850 +158,1227 @@ struct dht_rebalance_ {
* events
**/
typedef enum {
- qdstatfs_action_OFF = 0,
- qdstatfs_action_REPLACE,
- qdstatfs_action_NEGLECT,
- qdstatfs_action_COMPARE,
+ qdstatfs_action_OFF = 0,
+ qdstatfs_action_REPLACE,
+ qdstatfs_action_NEGLECT,
+ qdstatfs_action_COMPARE,
} qdstatfs_action_t;
-struct dht_skip_linkto_unlink {
+typedef enum {
+ REACTION_INVALID,
+ FAIL_ON_ANY_ERROR,
+ IGNORE_ENOENT_ESTALE,
+ IGNORE_ENOENT_ESTALE_EIO,
+} dht_reaction_type_t;
- gf_boolean_t handle_valid_link;
- int opend_fd_count;
- xlator_t *hash_links_to;
- uuid_t cached_gfid;
- uuid_t hashed_gfid;
+struct dht_skip_linkto_unlink {
+ xlator_t *hash_links_to;
+ uuid_t cached_gfid;
+ uuid_t hashed_gfid;
+ int opend_fd_count;
+ gf_boolean_t handle_valid_link;
};
typedef struct {
- xlator_t *xl;
- loc_t loc; /* contains/points to inode to lock on. */
- short type; /* read/write lock. */
- char *domain; /* Only locks within a single domain
- * contend with each other
- */
- gf_lkowner_t lk_owner;
- gf_boolean_t locked;
+ xlator_t *xl;
+ loc_t loc; /* contains/points to inode to lock on. */
+ char *domain; /* Only locks within a single domain
+ * contend with each other
+ */
+ char *basename; /* Required for entrylk */
+ gf_boolean_t locked;
+ dht_reaction_type_t do_on_failure;
+ short type; /* read/write lock. */
+ gf_lkowner_t lk_owner;
} dht_lock_t;
-typedef
-int (*dht_selfheal_layout_t)(call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout);
-
-typedef
-gf_boolean_t (*dht_need_heal_t)(call_frame_t *frame, dht_layout_t **inmem,
- dht_layout_t **ondisk);
-
-struct dht_local {
- int call_cnt;
- loc_t loc;
- loc_t loc2;
- int op_ret;
- int op_errno;
- int layout_mismatch;
- /* Use stbuf as the postbuf, when we require both
- * pre and post attrs */
- struct iatt stbuf;
- struct iatt prebuf;
- struct iatt preoldparent;
- struct iatt postoldparent;
- struct iatt preparent;
- struct iatt postparent;
- struct statvfs statvfs;
- fd_t *fd;
- inode_t *inode;
- dict_t *params;
- dict_t *xattr;
- dict_t *xattr_req;
- dht_layout_t *layout;
- size_t size;
- ino_t ia_ino;
- xlator_t *src_hashed, *src_cached;
- xlator_t *dst_hashed, *dst_cached;
- xlator_t *cached_subvol;
- xlator_t *hashed_subvol;
- char need_selfheal;
- int file_count;
- int dir_count;
- call_frame_t *main_frame;
- int fop_succeeded;
- struct {
- fop_mknod_cbk_t linkfile_cbk;
- struct iatt stbuf;
- loc_t loc;
- inode_t *inode;
- dict_t *xattr;
- xlator_t *srcvol;
- } linkfile;
- struct {
- uint32_t hole_cnt;
- uint32_t overlaps_cnt;
- uint32_t down;
- uint32_t misc;
- dht_selfheal_dir_cbk_t dir_cbk;
- dht_selfheal_layout_t healer;
- dht_need_heal_t should_heal;
- gf_boolean_t force_mkdir;
- dht_layout_t *layout, *refreshed_layout;
- } selfheal;
- uint32_t uid;
- uint32_t gid;
-
- /* needed by nufa */
- int32_t flags;
- mode_t mode;
- dev_t rdev;
- mode_t umask;
-
- /* need for file-info */
- char *xattr_val;
- char *key;
-
- /* which xattr request? */
- char xsel[256];
- int32_t alloc_len;
-
- char *newpath;
-
- /* gfid related */
- uuid_t gfid;
-
- /* flag used to make sure we need to return estale in
- {lookup,revalidate}_cbk */
- char return_estale;
- char need_lookup_everywhere;
-
- glusterfs_fop_t fop;
-
- gf_boolean_t linked;
- xlator_t *link_subvol;
-
- struct dht_rebalance_ rebalance;
- xlator_t *first_up_subvol;
+/* The lock structure represents inodelk. */
+typedef struct {
+ fop_inodelk_cbk_t inodelk_cbk;
+ dht_lock_t **locks;
+ int lk_count;
+ dht_reaction_type_t reaction;
- gf_boolean_t quota_deem_statfs;
+ /* whether locking failed on _any_ of the "locks" above */
+ int op_ret;
+ int op_errno;
+} dht_ilock_wrap_t;
- gf_boolean_t added_link;
- gf_boolean_t is_linkfile;
+/* The lock structure represents entrylk. */
+typedef struct {
+ fop_entrylk_cbk_t entrylk_cbk;
+ dht_lock_t **locks;
+ int lk_count;
+ dht_reaction_type_t reaction;
+
+ /* whether locking failed on _any_ of the "locks" above */
+ int op_ret;
+ int op_errno;
+} dht_elock_wrap_t;
+
+/* The first member of dht_dir_transaction_t should be of type dht_ilock_wrap_t.
+ * Otherwise it can result in subtle memory corruption issues as in most of the
+ * places we use lock[0].layout.my_layout or lock[0].layout.parent_layout and
+ * lock[0].ns.parent_layout (like in dht_local_wipe).
+ */
+typedef union {
+ union {
+ dht_ilock_wrap_t my_layout;
+ dht_ilock_wrap_t parent_layout;
+ } layout;
+ struct dht_namespace {
+ dht_ilock_wrap_t parent_layout;
+ dht_elock_wrap_t directory_ns;
+ fop_entrylk_cbk_t ns_cbk;
+ } ns;
+} dht_dir_transaction_t;
+
+typedef int (*dht_selfheal_layout_t)(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout);
- struct dht_skip_linkto_unlink skip_unlink;
+typedef gf_boolean_t (*dht_need_heal_t)(call_frame_t *frame,
+ dht_layout_t **inmem,
+ dht_layout_t **ondisk);
- struct {
- fop_inodelk_cbk_t inodelk_cbk;
- dht_lock_t **locks;
- int lk_count;
-
- /* whether locking failed on _any_ of the "locks" above */
- int op_ret;
- int op_errno;
- } lock;
+struct dht_local {
+ loc_t loc;
+ loc_t loc2;
+ int call_cnt;
+ int op_ret;
+ int op_errno;
+ int layout_mismatch;
+ /* Use stbuf as the postbuf, when we require both
+ * pre and post attrs */
+ struct iatt stbuf;
+ struct iatt mds_stbuf;
+ struct iatt prebuf;
+ struct iatt preoldparent;
+ struct iatt postoldparent;
+ struct iatt preparent;
+ struct iatt postparent;
+ struct statvfs statvfs;
+ fd_t *fd;
+ inode_t *inode;
+ dict_t *params;
+ dict_t *xattr;
+ dict_t *mds_xattr;
+ dict_t *xdata; /* dict used to save xdata response by xattr fop */
+ dict_t *xattr_req;
+ dht_layout_t *layout;
+ size_t size;
+ ino_t ia_ino;
+ xlator_t *src_hashed, *src_cached;
+ xlator_t *dst_hashed, *dst_cached;
+ xlator_t *cached_subvol;
+ xlator_t *hashed_subvol;
+ xlator_t *mds_subvol; /* This is use for dir only */
+ int file_count;
+ int dir_count;
+ call_frame_t *main_frame;
+ int fop_succeeded;
+ struct {
+ fop_mknod_cbk_t linkfile_cbk;
+ struct iatt stbuf;
+ loc_t loc;
+ inode_t *inode;
+ dict_t *xattr;
+ xlator_t *srcvol;
+ } linkfile;
+ struct {
+ uint32_t hole_cnt;
+ uint32_t overlaps_cnt;
+ uint32_t down;
+ uint32_t misc;
+ dht_selfheal_dir_cbk_t dir_cbk;
+ dht_selfheal_layout_t healer;
+ dht_need_heal_t should_heal;
+ dht_layout_t *layout, *refreshed_layout;
+ uint32_t missing_cnt;
+ gf_boolean_t force_mkdir;
+ } selfheal;
+
+ dht_refresh_layout_unlock refresh_layout_unlock;
+ dht_refresh_layout_done_handle refresh_layout_done;
+
+ uint32_t uid;
+ uint32_t gid;
+ pid_t pid;
+
+ glusterfs_fop_t fop;
+
+ /* need for file-info */
+ char *xattr_val;
+ char *key;
+
+ /* needed by nufa */
+ int32_t flags;
+ mode_t mode;
+ dev_t rdev;
+ mode_t umask;
+
+ /* which xattr request? */
+ char xsel[256];
+ int32_t alloc_len;
+
+ /* gfid related */
+ uuid_t gfid;
+ uuid_t gfid_req;
+
+ xlator_t *link_subvol;
+
+ struct dht_rebalance_ rebalance;
+ xlator_t *first_up_subvol;
+
+ struct dht_skip_linkto_unlink skip_unlink;
+
+ dht_dir_transaction_t lock[2], *current;
+
+ /* inodelks during filerename for backward compatibility */
+ dht_lock_t **rename_inodelk_backward_compatible;
+
+ call_stub_t *stub;
+ int32_t parent_disk_layout[4];
+
+ /* rename rollback */
+ int *ret_cache;
+
+ loc_t loc2_copy;
+
+ int rename_inodelk_bc_count;
+ /* This is use only for directory operation */
+ int32_t valid;
+ int32_t mds_heal_fresh_lookup;
+ short lock_type;
+ char need_selfheal;
+ char need_xattr_heal;
+ char need_attrheal;
+ /* flag used to make sure we need to return estale in
+ {lookup,revalidate}_cbk */
+ char return_estale;
+ char need_lookup_everywhere;
+ /* fd open check */
+ gf_boolean_t fd_checked;
+ gf_boolean_t linked;
+ gf_boolean_t added_link;
+ gf_boolean_t is_linkfile;
+ gf_boolean_t quota_deem_statfs;
+ gf_boolean_t heal_layout;
+ gf_boolean_t locked;
+ gf_boolean_t dont_create_linkto;
+ gf_boolean_t gfid_missing;
};
typedef struct dht_local dht_local_t;
/* du - disk-usage */
struct dht_du {
- double avail_percent;
- double avail_inodes;
- uint64_t avail_space;
- uint32_t log;
- uint32_t chunks;
+ double avail_percent;
+ double avail_inodes;
+ uint64_t avail_space;
+ uint32_t log;
+ uint32_t chunks;
+ uint32_t total_blocks;
+ uint32_t avail_blocks;
+ uint32_t frsize; /*fragment size*/
};
typedef struct dht_du dht_du_t;
enum gf_defrag_type {
- GF_DEFRAG_CMD_START = 1,
- GF_DEFRAG_CMD_STOP = 1 + 1,
- GF_DEFRAG_CMD_STATUS = 1 + 2,
- GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
- GF_DEFRAG_CMD_START_FORCE = 1 + 4,
- GF_DEFRAG_CMD_START_TIER = 1 + 5,
- GF_DEFRAG_CMD_STATUS_TIER = 1 + 6,
- GF_DEFRAG_CMD_START_DETACH_TIER = 1 + 7,
- GF_DEFRAG_CMD_STOP_DETACH_TIER = 1 + 8,
-
+ GF_DEFRAG_CMD_NONE = 0,
+ GF_DEFRAG_CMD_START = 1,
+ GF_DEFRAG_CMD_STOP = 1 + 1,
+ GF_DEFRAG_CMD_STATUS = 1 + 2,
+ GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
+ GF_DEFRAG_CMD_START_FORCE = 1 + 4,
+ GF_DEFRAG_CMD_DETACH_STATUS = 1 + 11,
+ GF_DEFRAG_CMD_DETACH_START = 1 + 13,
+ GF_DEFRAG_CMD_DETACH_COMMIT = 1 + 14,
+ GF_DEFRAG_CMD_DETACH_COMMIT_FORCE = 1 + 15,
+ GF_DEFRAG_CMD_DETACH_STOP = 1 + 16,
+ /* new labels are used so it will help
+ * while removing old labels by easily differentiating.
+ * A few labels are added so that the count remains same
+ * between this enum and the ones on the xdr file.
+ * different values for the same enum cause errors and
+ * confusion.
+ */
};
typedef enum gf_defrag_type gf_defrag_type;
enum gf_defrag_status_t {
- GF_DEFRAG_STATUS_NOT_STARTED,
- GF_DEFRAG_STATUS_STARTED,
- GF_DEFRAG_STATUS_STOPPED,
- GF_DEFRAG_STATUS_COMPLETE,
- GF_DEFRAG_STATUS_FAILED,
- GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED,
- GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED,
- GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE,
- GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED,
+ GF_DEFRAG_STATUS_NOT_STARTED,
+ GF_DEFRAG_STATUS_STARTED,
+ GF_DEFRAG_STATUS_STOPPED,
+ GF_DEFRAG_STATUS_COMPLETE,
+ GF_DEFRAG_STATUS_FAILED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED,
};
typedef enum gf_defrag_status_t gf_defrag_status_t;
typedef struct gf_defrag_pattern_list gf_defrag_pattern_list_t;
struct gf_defrag_pattern_list {
- char path_pattern[256];
- uint64_t size;
- gf_defrag_pattern_list_t *next;
+ char path_pattern[256];
+ uint64_t size;
+ gf_defrag_pattern_list_t *next;
+};
+
+struct dht_container {
+ union {
+ struct list_head list;
+ struct {
+ struct _gf_dirent_t *next;
+ struct _gf_dirent_t *prev;
+ };
+ };
+ gf_dirent_t *df_entry;
+ xlator_t *this;
+ loc_t *parent_loc;
+ dict_t *migrate_data;
+ int local_subvol_index;
};
+typedef struct nodeuuid_info {
+ char info; /* Set to 1 is this is my node's uuid*/
+ uuid_t uuid; /* Store the nodeuuid as well for debugging*/
+} nodeuuid_info_t;
+
+typedef struct subvol_nodeuuids_info {
+ nodeuuid_info_t *elements;
+ int count;
+} subvol_nodeuuids_info_t;
+
struct gf_defrag_info_ {
- uint64_t total_files;
- uint64_t total_data;
- uint64_t num_files_lookedup;
- uint64_t total_failures;
- uint64_t skipped;
- gf_lock_t lock;
- int cmd;
- pthread_t th;
- gf_defrag_status_t defrag_status;
- struct rpc_clnt *rpc;
- uint32_t connected;
- uint32_t is_exiting;
- pid_t pid;
- inode_t *root_inode;
- uuid_t node_uuid;
- struct timeval start_time;
- gf_boolean_t stats;
- gf_defrag_pattern_list_t *defrag_pattern;
- int tier_promote_frequency;
- int tier_demote_frequency;
-
- /*Data Tiering params for scanner*/
- uint64_t total_files_promoted;
- uint64_t total_files_demoted;
- int write_freq_threshold;
- int read_freq_threshold;
+ uint64_t total_files;
+ uint64_t total_data;
+ uint64_t num_files_lookedup;
+ uint64_t total_failures;
+ uint64_t skipped;
+ uint64_t num_dirs_processed;
+ uint64_t size_processed;
+ gf_lock_t lock;
+ pthread_t th;
+ struct rpc_clnt *rpc;
+ uint32_t connected;
+ uint32_t is_exiting;
+ pid_t pid;
+ int cmd;
+ inode_t *root_inode;
+ uuid_t node_uuid;
+ time_t start_time;
+ uint32_t new_commit_hash;
+ gf_defrag_status_t defrag_status;
+ gf_defrag_pattern_list_t *defrag_pattern;
+
+ pthread_cond_t parallel_migration_cond;
+ pthread_mutex_t dfq_mutex;
+ pthread_cond_t rebalance_crawler_alarm;
+ int32_t q_entry_count;
+ int32_t global_error;
+ struct dht_container *queue;
+ int32_t crawl_done;
+ int32_t abort;
+ int32_t wakeup_crawler;
+
+ /*Throttle params*/
+ /*stands for reconfigured thread count*/
+ int32_t recon_thread_count;
+ pthread_cond_t df_wakeup_thread;
+
+ /* backpointer to make it easier to write functions for rebalance */
+ xlator_t *this;
+
+ pthread_cond_t fc_wakeup_cond;
+ pthread_mutex_t fc_mutex;
+
+ /*stands for current running thread count*/
+ int32_t current_thread_count;
+
+ gf_boolean_t stats;
+ /* lock migration flag */
+ gf_boolean_t lock_migration_enabled;
};
typedef struct gf_defrag_info_ gf_defrag_info_t;
struct dht_methods_s {
- int32_t (*migration_get_dst_subvol)(xlator_t *this,
- dht_local_t *local);
- int32_t (*migration_other)(xlator_t *this,
- gf_defrag_info_t *defrag);
- int32_t (*migration_needed)(xlator_t *this);
- xlator_t* (*layout_search)(xlator_t *this,
- dht_layout_t *layout,
- const char *name);
+ int32_t (*migration_get_dst_subvol)(xlator_t *this, dht_local_t *local);
+ int32_t (*migration_other)(xlator_t *this, gf_defrag_info_t *defrag);
+ xlator_t *(*layout_search)(xlator_t *this, dht_layout_t *layout,
+ const char *name);
};
typedef struct dht_methods_s dht_methods_t;
struct dht_conf {
- gf_lock_t subvolume_lock;
- int subvolume_cnt;
- xlator_t **subvolumes;
- char *subvolume_status;
- int *last_event;
- dht_layout_t **file_layouts;
- dht_layout_t **dir_layouts;
- gf_boolean_t search_unhashed;
- int gen;
- dht_du_t *du_stats;
- double min_free_disk;
- double min_free_inodes;
- char disk_unit;
- int32_t refresh_interval;
- gf_boolean_t unhashed_sticky_bit;
- struct timeval last_stat_fetch;
- gf_lock_t layout_lock;
- dict_t *leaf_to_subvol;
- void *private; /* Can be used by wrapper xlators over
- dht */
- gf_boolean_t use_readdirp;
- char vol_uuid[UUID_SIZE + 1];
- gf_boolean_t assert_no_child_down;
- time_t *subvol_up_time;
-
- /* This is the count used as the distribute layout for a directory */
- /* Will be a global flag to control the layout spread count */
- uint32_t dir_spread_cnt;
-
- /* to keep track of nodes which are decommissioned */
- xlator_t **decommissioned_bricks;
- int decommission_in_progress;
- int decommission_subvols_cnt;
-
- /* defrag related */
- gf_defrag_info_t *defrag;
-
- /* Request to filter directory entries in readdir request */
-
- gf_boolean_t readdir_optimize;
-
- /* Support regex-based name reinterpretation. */
- regex_t rsync_regex;
- gf_boolean_t rsync_regex_valid;
- regex_t extra_regex;
- gf_boolean_t extra_regex_valid;
-
- /* Support variable xattr names. */
- char *xattr_name;
- char *link_xattr_name;
- char *wild_xattr_name;
-
- /* Support size-weighted rebalancing (heterogeneous bricks). */
- gf_boolean_t do_weighting;
- gf_boolean_t randomize_by_gfid;
-
- dht_methods_t *methods;
-
- struct mem_pool *lock_pool;
+ xlator_t **subvolumes;
+ char *subvolume_status;
+ int *last_event;
+ dht_layout_t **file_layouts;
+ dht_layout_t **dir_layouts;
+ unsigned int search_unhashed;
+ int gen;
+ dht_du_t *du_stats;
+ double min_free_disk;
+ double min_free_inodes;
+ int subvolume_cnt;
+ int32_t refresh_interval;
+ gf_lock_t subvolume_lock;
+ time_t last_stat_fetch;
+ gf_lock_t layout_lock;
+ dict_t *leaf_to_subvol;
+ void *private; /* Can be used by wrapper xlators over
+ dht */
+ time_t *subvol_up_time;
+
+ /* to keep track of nodes which are decommissioned */
+ xlator_t **decommissioned_bricks;
+ int decommission_in_progress;
+ int decommission_subvols_cnt;
+
+ /* defrag related */
+ gf_defrag_info_t *defrag;
+
+ /* Support regex-based name reinterpretation. */
+ regex_t rsync_regex;
+ regex_t extra_regex;
+
+ /* Support variable xattr names. */
+ char *xattr_name;
+ char *mds_xattr_key;
+ char *link_xattr_name;
+ char *commithash_xattr_name;
+ char *wild_xattr_name;
+
+ dht_methods_t methods;
+
+ struct mem_pool *lock_pool;
+
+ /*local subvol storage for rebalance*/
+ xlator_t **local_subvols;
+ subvol_nodeuuids_info_t *local_nodeuuids;
+ int32_t local_subvols_cnt;
+
+ int dthrottle;
+
+ /* Hard link handle requirement for migration triggered from client*/
+ synclock_t link_lock;
+
+ /* lock migration */
+ gf_lock_t lock;
+
+ /* This is the count used as the distribute layout for a directory */
+ /* Will be a global flag to control the layout spread count */
+ uint32_t dir_spread_cnt;
+
+ /*
+ * "Commit hash" for this volume topology. Changed whenever bricks
+ * are added or removed.
+ */
+ uint32_t vol_commit_hash;
+
+ char vol_uuid[UUID_SIZE + 1];
+
+ char disk_unit;
+
+ gf_boolean_t lock_migration_enabled;
+
+ gf_boolean_t vch_forced;
+
+ gf_boolean_t use_fallocate;
+
+ gf_boolean_t force_migration;
+
+ gf_boolean_t lookup_optimize;
+
+ gf_boolean_t unhashed_sticky_bit;
+
+ gf_boolean_t assert_no_child_down;
+
+ gf_boolean_t use_readdirp;
+
+ /* Request to filter directory entries in readdir request */
+ gf_boolean_t readdir_optimize;
+
+ gf_boolean_t rsync_regex_valid;
+
+ gf_boolean_t extra_regex_valid;
+
+ /* Support size-weighted rebalancing (heterogeneous bricks). */
+ gf_boolean_t do_weighting;
+
+ gf_boolean_t randomize_by_gfid;
};
typedef struct dht_conf dht_conf_t;
+struct dht_dfoffset_ctx {
+ xlator_t *this;
+ off_t offset;
+ int32_t readdir_done;
+};
+typedef struct dht_dfoffset_ctx dht_dfoffset_ctx_t;
struct dht_disk_layout {
- uint32_t cnt;
- uint32_t type;
- struct {
- uint32_t start;
- uint32_t stop;
- } list[1];
+ uint32_t cnt;
+ uint32_t type;
+ struct {
+ uint32_t start;
+ uint32_t stop;
+ } list[1];
};
typedef struct dht_disk_layout dht_disk_layout_t;
typedef enum {
- GF_DHT_MIGRATE_DATA,
- GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS,
- GF_DHT_MIGRATE_HARDLINK,
- GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS
+ GF_DHT_MIGRATE_DATA,
+ GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS,
+ GF_DHT_MIGRATE_HARDLINK,
+ GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS
} gf_dht_migrate_data_type_t;
typedef enum {
- GF_DHT_EQUAL_DISTRIBUTION,
- GF_DHT_WEIGHTED_DISTRIBUTION
+ GF_DHT_EQUAL_DISTRIBUTION,
+ GF_DHT_WEIGHTED_DISTRIBUTION
} dht_distribution_type_t;
+struct dir_dfmeta {
+ gf_dirent_t *equeue;
+ dht_dfoffset_ctx_t *offset_var;
+ struct list_head **head;
+ struct list_head **iterator;
+ int *fetch_entries;
+ /* fds corresponding to local subvols only */
+ fd_t **lfd;
+};
+
+typedef struct dht_migrate_info {
+ xlator_t *src_subvol;
+ xlator_t *dst_subvol;
+ GF_REF_DECL;
+} dht_migrate_info_t;
+
+typedef struct dht_fd_ctx {
+ uint64_t opened_on_dst;
+ GF_REF_DECL;
+} dht_fd_ctx_t;
+
#define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT)
-#define is_revalidate(loc) (dht_inode_ctx_layout_get (loc->inode, this, NULL) == 0)
+#define is_revalidate(loc) \
+ (dht_inode_ctx_layout_get((loc)->inode, this, NULL) == 0)
#define is_last_call(cnt) (cnt == 0)
#define DHT_MIGRATION_IN_PROGRESS 1
-#define DHT_MIGRATION_COMPLETED 2
+#define DHT_MIGRATION_COMPLETED 2
-#define check_is_linkfile(i,s,x,n) (IS_DHT_LINKFILE_MODE (s) && dict_get (x, n))
+#define check_is_linkfile(i, s, x, n) \
+ (IS_DHT_LINKFILE_MODE(s) && dict_get(x, n))
-#define IS_DHT_MIGRATION_PHASE2(buf) ( \
- IA_ISREG ((buf)->ia_type) && \
- ((st_mode_from_ia ((buf)->ia_prot, (buf)->ia_type) & \
- ~S_IFMT) == DHT_LINKFILE_MODE))
+#define IS_DHT_MIGRATION_PHASE2(buf) \
+ (IA_ISREG((buf)->ia_type) && \
+ ((st_mode_from_ia((buf)->ia_prot, (buf)->ia_type) & ~S_IFMT) == \
+ DHT_LINKFILE_MODE))
-#define IS_DHT_MIGRATION_PHASE1(buf) ( \
- IA_ISREG ((buf)->ia_type) && \
- ((buf)->ia_prot.sticky == 1) && \
- ((buf)->ia_prot.sgid == 1))
+#define IS_DHT_MIGRATION_PHASE1(buf) \
+ (IA_ISREG((buf)->ia_type) && ((buf)->ia_prot.sticky == 1) && \
+ ((buf)->ia_prot.sgid == 1))
-#define DHT_STRIP_PHASE1_FLAGS(buf) do { \
- if ((buf) && IS_DHT_MIGRATION_PHASE1(buf)) { \
- (buf)->ia_prot.sticky = 0; \
- (buf)->ia_prot.sgid = 0; \
- } \
- } while (0)
+#define DHT_STRIP_PHASE1_FLAGS(buf) \
+ do { \
+ if ((buf) && IS_DHT_MIGRATION_PHASE1(buf)) { \
+ (buf)->ia_prot.sticky = 0; \
+ (buf)->ia_prot.sgid = 0; \
+ } \
+ } while (0)
#define dht_inode_missing(op_errno) (op_errno == ENOENT || op_errno == ESTALE)
-#define check_is_dir(i,s,x) (IA_ISDIR(s->ia_type))
+#define check_is_dir(i, s, x) (IA_ISDIR(s->ia_type))
#define layout_is_sane(layout) ((layout) && (layout->cnt > 0))
-#define DHT_STACK_UNWIND(fop, frame, params ...) do { \
- dht_local_t *__local = NULL; \
- xlator_t *__xl = NULL; \
- if (frame) { \
- __xl = frame->this; \
- __local = frame->local; \
- frame->local = NULL; \
- } \
- STACK_UNWIND_STRICT (fop, frame, params); \
- dht_local_wipe (__xl, __local); \
- } while (0)
-
-#define DHT_STACK_DESTROY(frame) do { \
- dht_local_t *__local = NULL; \
- xlator_t *__xl = NULL; \
- __xl = frame->this; \
- __local = frame->local; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- dht_local_wipe (__xl, __local); \
- } while (0)
-
-#define DHT_UPDATE_TIME(ctx_sec, ctx_nsec, new_sec, new_nsec, inode, post) do {\
- LOCK (&inode->lock); \
- { \
- if (ctx_sec == new_sec) \
- new_nsec = max (new_nsec, ctx_nsec); \
- else if (ctx_sec > new_sec) { \
- new_sec = ctx_sec; \
- new_nsec = ctx_nsec; \
- } \
- if (post) { \
- ctx_sec = new_sec; \
- ctx_nsec = new_nsec; \
- } \
- } \
- UNLOCK (&inode->lock); \
- } while (0)
-
-#define is_greater_time(a, an, b, bn) (((a) < (b)) || (((a) == (b)) && ((an) < (bn))))
-dht_layout_t *dht_layout_new (xlator_t *this, int cnt);
-dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode);
-dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol);
-xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout,
- const char *name);
-int32_t
-dht_migration_get_dst_subvol(xlator_t *this, dht_local_t *local);
+#define we_are_not_migrating(x) ((x) == 1)
+
+#define DHT_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ dht_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ if (frame) { \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ dht_local_wipe(__xl, __local); \
+ } while (0)
+
+#define DHT_STACK_DESTROY(frame) \
+ do { \
+ dht_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_DESTROY(frame->root); \
+ dht_local_wipe(__xl, __local); \
+ } while (0)
+
+#define DHT_UPDATE_TIME(ctx_sec, ctx_nsec, new_sec, new_nsec, post) \
+ do { \
+ if (ctx_sec == new_sec) \
+ new_nsec = max(new_nsec, ctx_nsec); \
+ else if (ctx_sec > new_sec) { \
+ new_sec = ctx_sec; \
+ new_nsec = ctx_nsec; \
+ } \
+ if (post) { \
+ ctx_sec = new_sec; \
+ ctx_nsec = new_nsec; \
+ } \
+ } while (0)
+
+#define is_greater_time(a, an, b, bn) \
+ (((a) < (b)) || (((a) == (b)) && ((an) < (bn))))
+
+#define DHT_MARK_FOP_INTERNAL(xattr) \
+ do { \
+ int tmp = -1; \
+ if (!xattr) { \
+ xattr = dict_new(); \
+ if (!xattr) \
+ break; \
+ } \
+ tmp = dict_set_str(xattr, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); \
+ if (tmp) { \
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, \
+ "Failed to set dictionary value: key = %s," \
+ " path = %s", \
+ GLUSTERFS_INTERNAL_FOP_KEY, local->loc.path); \
+ } \
+ } while (0)
+
+dht_layout_t *
+dht_layout_new(xlator_t *this, int cnt);
+dht_layout_t *
+dht_layout_get(xlator_t *this, inode_t *inode);
+dht_layout_t *
+dht_layout_for_subvol(xlator_t *this, xlator_t *subvol);
+xlator_t *
+dht_layout_search(xlator_t *this, dht_layout_t *layout, const char *name);
+int32_t
+dht_migration_get_dst_subvol(xlator_t *this, dht_local_t *local);
int32_t
dht_migration_needed(xlator_t *this);
-int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout);
-int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
- uint32_t *holes_p, uint32_t *overlaps_p,
- uint32_t *missing_p, uint32_t *down_p,
- uint32_t *misc_p, uint32_t *no_space_p);
-int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout,
- xlator_t *subvol, loc_t *loc, dict_t *xattr);
+int
+dht_layout_normalize(xlator_t *this, loc_t *loc, dht_layout_t *layout);
+void
+dht_layout_anomalies(xlator_t *this, loc_t *loc, dht_layout_t *layout,
+ uint32_t *holes_p, uint32_t *overlaps_p,
+ uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p,
+ uint32_t *no_space_p);
+int
+dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
+ loc_t *loc, dict_t *xattr);
+xlator_t *
+dht_linkfile_subvol(xlator_t *this, inode_t *inode, struct iatt *buf,
+ dict_t *xattr);
+int
+dht_linkfile_unlink(call_frame_t *frame, xlator_t *this, xlator_t *subvol,
+ loc_t *loc);
+
+int
+dht_layouts_init(xlator_t *this, dht_conf_t *conf);
+int
+dht_layout_merge(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
+ int op_ret, int op_errno, dict_t *xattr);
+
+int
+dht_disk_layout_extract(xlator_t *this, dht_layout_t *layout, int pos,
+ int32_t **disk_layout_p);
+int
+dht_disk_layout_extract_for_subvol(xlator_t *this, dht_layout_t *layout,
+ xlator_t *subvol, int32_t **disk_layout_p);
+
+int
+dht_frame_return(call_frame_t *frame);
+
+int
+dht_deitransform(xlator_t *this, uint64_t y, xlator_t **subvol);
+
+void
+dht_local_wipe(xlator_t *this, dht_local_t *local);
+dht_local_t *
+dht_local_init(call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop);
+int
+dht_iatt_merge(xlator_t *this, struct iatt *to, struct iatt *from);
+
+xlator_t *
+dht_subvol_get_hashed(xlator_t *this, loc_t *loc);
+xlator_t *
+dht_subvol_get_cached(xlator_t *this, inode_t *inode);
+xlator_t *
+dht_subvol_next(xlator_t *this, xlator_t *prev);
+xlator_t *
+dht_subvol_next_available(xlator_t *this, xlator_t *prev);
+int
+dht_subvol_cnt(xlator_t *this, xlator_t *subvol);
-xlator_t *dht_linkfile_subvol (xlator_t *this, inode_t *inode,
- struct iatt *buf, dict_t *xattr);
-int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
- xlator_t *subvol, loc_t *loc);
+int
+dht_hash_compute(xlator_t *this, int type, const char *name, uint32_t *hash_p);
-int dht_layouts_init (xlator_t *this, dht_conf_t *conf);
-int dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
- int op_ret, int op_errno, dict_t *xattr);
+int
+dht_linkfile_create(call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
+ xlator_t *this, xlator_t *tovol, xlator_t *fromvol,
+ loc_t *loc);
+int
+dht_lookup_everywhere(call_frame_t *frame, xlator_t *this, loc_t *loc);
+int
+dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ loc_t *loc, dht_layout_t *layout);
+int
+dht_selfheal_new_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ dht_layout_t *layout);
+int
+dht_selfheal_restore(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ loc_t *loc, dht_layout_t *layout);
+void
+dht_layout_sort_volname(dht_layout_t *layout);
-int dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
- int pos, int32_t **disk_layout_p);
-int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
- int pos, void *disk_layout_raw, int disk_layout_len);
+int
+dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc);
+gf_boolean_t
+dht_is_subvol_filled(xlator_t *this, xlator_t *subvol);
+xlator_t *
+dht_free_disk_available_subvol(xlator_t *this, xlator_t *subvol,
+ dht_local_t *layout);
+int
+dht_get_du_info_for_subvol(xlator_t *this, int subvol_idx);
-int dht_frame_return (call_frame_t *frame);
+int
+dht_layout_preset(xlator_t *this, xlator_t *subvol, inode_t *inode);
+int
+dht_layout_set(xlator_t *this, inode_t *inode, dht_layout_t *layout);
+;
+void
+dht_layout_unref(xlator_t *this, dht_layout_t *layout);
+dht_layout_t *
+dht_layout_ref(xlator_t *this, dht_layout_t *layout);
+int
+dht_layout_index_for_subvol(dht_layout_t *layout, xlator_t *subvol);
+xlator_t *
+dht_first_up_subvol(xlator_t *this);
+xlator_t *
+dht_last_up_subvol(xlator_t *this);
-int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol);
+int
+dht_build_child_loc(xlator_t *this, loc_t *child, loc_t *parent, char *name);
-void dht_local_wipe (xlator_t *this, dht_local_t *local);
-dht_local_t *dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd,
- glusterfs_fop_t fop);
-int dht_iatt_merge (xlator_t *this, struct iatt *to, struct iatt *from,
- xlator_t *subvol);
+int
+dht_filter_loc_subvol_key(xlator_t *this, loc_t *loc, loc_t *new_loc,
+ xlator_t **subvol);
-xlator_t *dht_subvol_get_hashed (xlator_t *this, loc_t *loc);
-xlator_t *dht_subvol_get_cached (xlator_t *this, inode_t *inode);
-xlator_t *dht_subvol_next (xlator_t *this, xlator_t *prev);
-xlator_t *dht_subvol_next_available (xlator_t *this, xlator_t *prev);
-int dht_subvol_cnt (xlator_t *this, xlator_t *subvol);
+int
+dht_rename_cleanup(call_frame_t *frame);
+int
+dht_rename_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
-int dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p);
+int
+dht_update_commit_hash_for_layout(call_frame_t *frame);
+int
+dht_fix_directory_layout(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ dht_layout_t *layout);
-int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
- xlator_t *this, xlator_t *tovol,
- xlator_t *fromvol, loc_t *loc);
-int dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc);
-int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc);
int
-dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- loc_t *loc, dht_layout_t *layout);
+dht_init_subvolumes(xlator_t *this, dht_conf_t *conf);
+/* migration/rebalance */
int
-dht_selfheal_directory_for_nameless_lookup (call_frame_t *frame,
- dht_selfheal_dir_cbk_t cbk,
- loc_t *loc, dht_layout_t *layout);
+dht_start_rebalance_task(xlator_t *this, call_frame_t *frame);
int
-dht_selfheal_new_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- dht_layout_t *layout);
+dht_rebalance_in_progress_check(xlator_t *this, call_frame_t *frame);
int
-dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- loc_t *loc, dht_layout_t *layout);
+dht_rebalance_complete_check(xlator_t *this, call_frame_t *frame);
+
int
-dht_layout_sort_volname (dht_layout_t *layout);
+dht_init_local_subvolumes(xlator_t *this, dht_conf_t *conf);
+
+/* FOPS */
+int32_t
+dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req);
+
+int32_t
+dht_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int32_t
+dht_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata);
+
+int32_t
+dht_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata);
-int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc);
+int32_t
+dht_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata);
-gf_boolean_t dht_is_subvol_filled (xlator_t *this, xlator_t *subvol);
-xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol,
- dht_local_t *layout);
-int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx);
+int32_t
+dht_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata);
-int dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode);
-int dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout);;
-void dht_layout_unref (xlator_t *this, dht_layout_t *layout);
-dht_layout_t *dht_layout_ref (xlator_t *this, dht_layout_t *layout);
-xlator_t *dht_first_up_subvol (xlator_t *this);
-xlator_t *dht_last_up_subvol (xlator_t *this);
+int32_t
+dht_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata);
-int dht_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name);
+int32_t
+dht_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata);
-int dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
- xlator_t **subvol);
+int32_t
+dht_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
-int dht_rename_cleanup (call_frame_t *frame);
-int dht_rename_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata);
+int32_t
+dht_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
-int dht_fix_directory_layout (call_frame_t *frame,
- dht_selfheal_dir_cbk_t dir_cbk,
- dht_layout_t *layout);
+int32_t
+dht_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata);
-int dht_init_subvolumes (xlator_t *this, dht_conf_t *conf);
+int32_t
+dht_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata);
-/* migration/rebalance */
-int dht_start_rebalance_task (xlator_t *this, call_frame_t *frame);
+int32_t
+dht_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
-int dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame);
-int dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame);
+int32_t
+dht_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+int32_t
+dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *params);
-/* FOPS */
-int32_t dht_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req);
-
-int32_t dht_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, dict_t *xdata);
-
-int32_t dht_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata);
-
-int32_t dht_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset, dict_t *xdata);
-
-int32_t dht_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset, dict_t *xdata);
-
-int32_t dht_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask, dict_t *xdata);
-
-int32_t dht_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size, dict_t *xdata);
-
-int32_t dht_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata);
-
-int32_t dht_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
-
-int32_t dht_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata);
-
-int32_t dht_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, dict_t *xdata);
-
-int32_t dht_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, mode_t umask,
- dict_t *xdata);
-
-int32_t dht_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-int32_t dht_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-int32_t dht_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *params);
-
-int32_t dht_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags, fd_t *fd, dict_t *xdata);
-
-int32_t dht_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset, uint32_t flags, dict_t *xdata);
-
-int32_t dht_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset,
- uint32_t flags,
- struct iobref *iobref, dict_t *xdata);
-
-int32_t dht_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata);
-
-int32_t dht_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-int32_t dht_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, fd_t *fd, dict_t *xdata);
-
-int32_t dht_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-int32_t dht_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, dict_t *xdata);
-
-int32_t dht_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-int32_t dht_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata);
-
-int32_t dht_fsetxattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-int32_t dht_fgetxattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-int32_t dht_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata);
-int32_t dht_fremovexattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-int32_t dht_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-int32_t dht_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-int32_t dht_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-int32_t dht_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
-
-int32_t dht_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
-
-int32_t dht_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size, off_t off, dict_t *xdata);
-
-int32_t dht_readdirp (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size, off_t off, dict_t *dict);
-
-int32_t dht_xattrop (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata);
-
-int32_t dht_fxattrop (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata);
-
-int32_t dht_forget (xlator_t *this, inode_t *inode);
-int32_t dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata);
-int32_t dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid, dict_t *xdata);
-int32_t dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t mode, off_t offset, size_t len, dict_t *xdata);
-int32_t dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset, size_t len, dict_t *xdata);
-int32_t dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset, off_t len, dict_t *xdata);
+int32_t
+dht_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
+
+int32_t
+dht_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int32_t
+dht_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata);
+
+int32_t
+dht_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata);
+
+int32_t
+dht_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata);
+
+int32_t
+dht_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata);
+
+int32_t
+dht_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata);
+
+int32_t
+dht_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int32_t
+dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t
+dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata);
+
+int32_t
+dht_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t
+dht_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata);
+
+int32_t
+dht_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
+int32_t
+dht_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t
+dht_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t
+dht_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata);
+
+int32_t
+dht_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata);
+
+int32_t
+dht_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata);
+
+int32_t
+dht_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata);
+
+int32_t
+dht_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata);
+
+int32_t
+dht_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata);
+
+int32_t
+dht_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict);
+
+int32_t
+dht_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int32_t
+dht_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int32_t
+dht_forget(xlator_t *this, inode_t *inode);
+int32_t
+dht_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata);
+int32_t
+dht_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata);
+int32_t
+dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata);
+int32_t
+dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
+int32_t
+dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata);
+int32_t
+dht_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata);
int
dht_set_subvol_range(xlator_t *this);
-int32_t dht_init (xlator_t *this);
-void dht_fini (xlator_t *this);
-int dht_reconfigure (xlator_t *this, dict_t *options);
-int32_t dht_notify (xlator_t *this, int32_t event, void *data, ...);
+int32_t
+dht_init(xlator_t *this);
+void
+dht_fini(xlator_t *this);
+int
+dht_reconfigure(xlator_t *this, dict_t *options);
+int32_t
+dht_notify(xlator_t *this, int32_t event, void *data, ...);
/* definitions for nufa/switch */
-int dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent);
-int dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+int
+dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *postparent);
+int
+dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *postparent);
+int
+dht_lookup_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, dict_t *xattr,
struct iatt *postparent);
-int dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent);
-int dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent);
-int dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata);
-int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
+int
+dht_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int
+dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
+int
+dht_newfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
+
+int
+dht_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
int
-gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict);
+dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xattr, dict_t *xdata);
int
-gf_defrag_start_detach_tier (gf_defrag_info_t *defrag);
+dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+int
+gf_defrag_status_get(dht_conf_t *conf, dict_t *dict);
int
-gf_defrag_stop (gf_defrag_info_t *defrag, gf_defrag_status_t status,
- dict_t *output);
+gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output);
-void*
-gf_defrag_start (void *this);
+void *
+gf_defrag_start(void *this);
int32_t
-gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs,
- struct iatt *stbuf);
+gf_defrag_handle_hardlink(xlator_t *this, loc_t *loc, int *fop_errno);
int
-dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
- int flag);
+dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+ int flag, int *fop_errno);
int
-dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this,
- dht_layout_t **layout_int);
+dht_inode_ctx_layout_get(inode_t *inode, xlator_t *this,
+ dht_layout_t **layout_int);
int
-dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this,
- dht_layout_t* layout_int);
+dht_inode_ctx_layout_set(inode_t *inode, xlator_t *this,
+ dht_layout_t *layout_int);
int
-dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat,
- int32_t update_ctx);
-void dht_inode_ctx_time_set (inode_t *inode, xlator_t *this, struct iatt *stat);
+dht_inode_ctx_time_update(inode_t *inode, xlator_t *this, struct iatt *stat,
+ int32_t update_ctx);
+void
+dht_inode_ctx_time_set(inode_t *inode, xlator_t *this, struct iatt *stat);
-int dht_inode_ctx_get (inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx);
-int dht_inode_ctx_set (inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx);
int
-dht_dir_attr_heal (void *data);
+dht_inode_ctx_get(inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx);
int
-dht_dir_attr_heal_done (int ret, call_frame_t *sync_frame, void *data);
+dht_inode_ctx_set(inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx);
int
-dht_dir_has_layout (dict_t *xattr, char *name);
-gf_boolean_t
-dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator);
+dht_dir_attr_heal(void *data);
+int
+dht_dir_attr_heal_done(int ret, call_frame_t *sync_frame, void *data);
xlator_t *
-dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol,
- dht_layout_t *layout);
+dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
+ xlator_t *ignore, dht_layout_t *layout,
+ uint64_t filesize);
xlator_t *
-dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol,
- dht_layout_t *layout);
+dht_subvol_maxspace_nonzeroinode(xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout);
int
-dht_linkfile_attr_heal (call_frame_t *frame, xlator_t *this);
+dht_dir_has_layout(dict_t *xattr, char *name);
+int
+dht_linkfile_attr_heal(call_frame_t *frame, xlator_t *this);
-void
-dht_layout_dump (dht_layout_t *layout, const char *prefix);
int32_t
-dht_priv_dump (xlator_t *this);
+dht_priv_dump(xlator_t *this);
int32_t
-dht_inodectx_dump (xlator_t *this, inode_t *inode);
+dht_inodectx_dump(xlator_t *this, inode_t *inode);
+
+gf_boolean_t
+dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator);
int
-dht_inode_ctx_get1 (xlator_t *this, inode_t *inode, xlator_t **subvol);
+dht_inode_ctx_get_mig_info(xlator_t *this, inode_t *inode,
+ xlator_t **src_subvol, xlator_t **dst_subvol);
+gf_boolean_t
+dht_mig_info_is_invalid(xlator_t *current, xlator_t *src_subvol,
+ xlator_t *dst_subvol);
int
-dht_subvol_status (dht_conf_t *conf, xlator_t *subvol);
+dht_subvol_status(dht_conf_t *conf, xlator_t *subvol);
void
-dht_log_new_layout_for_dir_selfheal (xlator_t *this, loc_t *loc,
- dht_layout_t *layout);
+dht_log_new_layout_for_dir_selfheal(xlator_t *this, loc_t *loc,
+ dht_layout_t *layout);
+
int
-dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this);
+dht_layout_sort(dht_layout_t *layout);
int
-dht_fill_dict_to_avoid_unlink_of_migrating_file (dict_t *dict);
+dht_heal_full_path(void *data);
+int
+dht_heal_full_path_done(int op_ret, call_frame_t *frame, void *data);
-/* Acquire non-blocking inodelk on a list of xlators.
- *
- * @lk_array: array of lock requests lock on.
- *
- * @lk_count: number of locks in @lk_array
- *
- * @inodelk_cbk: will be called after inodelk replies are received
- *
- * @retval: -1 if stack_winding inodelk fails. 0 otherwise.
- * inodelk_cbk is called with appropriate error on errors.
- * On failure to acquire lock on all members of list, successful
- * locks are unlocked before invoking cbk.
- */
+int
+dht_layout_missing_dirs(dht_layout_t *layout);
int
-dht_nonblocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array,
- int lk_count, fop_inodelk_cbk_t inodelk_cbk);
+dht_refresh_layout(call_frame_t *frame);
+
+int
+dht_build_parent_loc(xlator_t *this, loc_t *parent, loc_t *child,
+ int32_t *op_errno);
+
+int32_t
+dht_set_local_rebalance(xlator_t *this, dht_local_t *local, struct iatt *stbuf,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+void
+dht_build_root_loc(inode_t *inode, loc_t *loc);
+
+gf_boolean_t
+dht_fd_open_on_dst(xlator_t *this, fd_t *fd, xlator_t *dst);
+
+int32_t
+dht_fd_ctx_destroy(xlator_t *this, fd_t *fd);
+
+int32_t
+dht_release(xlator_t *this, fd_t *fd);
+
+int32_t
+dht_set_fixed_dir_stat(struct iatt *stat);
+
+xlator_t *
+dht_get_lock_subvolume(xlator_t *this, struct gf_flock *lock,
+ dht_local_t *local);
+
+int
+dht_lk_inode_unref(call_frame_t *frame, int32_t op_ret);
+
+int
+dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *subvol);
+
+int
+dht_check_and_open_fd_on_subvol(xlator_t *this, call_frame_t *frame);
+
+/* FD fop callbacks */
+
+int
+dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+int
+dht_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata);
+
+int
+dht_file_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int
+dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+int
+dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+int
+dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+int
+dht_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
-/* same as dht_nonblocking_inodelk, but issues sequential blocking locks on
- * @lk_array directly. locks are issued on some order which remains same
- * for a list of xlators (irrespective of order of xlators within list).
- */
int
-dht_blocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array,
- int lk_count, fop_inodelk_cbk_t inodelk_cbk);
+dht_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+int
+dht_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iovec *vector, int count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata);
+
+int
+dht_file_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *stbuf, dict_t *xdata);
+
+int
+dht_file_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata);
+
+int
+dht_file_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata);
+
+/* All custom xattr heal functions */
+int
+dht_dir_heal_xattrs(void *data);
+
+int
+dht_dir_heal_xattrs_done(int ret, call_frame_t *sync_frame, void *data);
int32_t
-dht_unlock_inodelk (call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
- fop_inodelk_cbk_t inodelk_cbk);
+dht_dict_set_array(dict_t *dict, char *key, int32_t value[], int32_t size);
+
+int
+dht_set_user_xattr(dict_t *dict, char *k, data_t *v, void *data);
+
+void
+dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
+ dict_t *src, int *uret, int *uflag);
+
+int
+dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno);
+
+int
+dht_common_mark_mdsxattr(call_frame_t *frame, int *errst, int flag);
+
+int
+dht_inode_ctx_mdsvol_get(inode_t *inode, xlator_t *this, xlator_t **mdsvol);
+
+int
+dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dht_layout_t *layout);
+
+/* Abstract out the DHT-IATT-IN-DICT */
-dht_lock_t *
-dht_lock_new (xlator_t *this, xlator_t *xl, loc_t *loc, short type,
- const char *domain);
void
-dht_lock_array_free (dht_lock_t **lk_array, int count);
+dht_selfheal_layout_new_directory(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *new_layout);
+
+int
+dht_pt_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+ dict_t *xdata);
+
+int
+dht_pt_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *key, dict_t *xdata);
int32_t
-dht_lock_count (dht_lock_t **lk_array, int lk_count);
+dht_pt_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
int
-dht_layout_sort (dht_layout_t *layout);
+dht_pt_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
+int32_t
+dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno);
int
-dht_layout_missing_dirs (dht_layout_t *layout);
+dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
-#endif/* _DHT_H */
+int32_t
+dht_create_lock(call_frame_t *frame, xlator_t *subvol);
+
+int
+dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local);
+
+int
+dht_dir_layout_error_check(xlator_t *this, inode_t *inode);
+
+int
+dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol);
+#endif /* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index 621c613d08c..c0588828fdb 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -8,440 +8,480 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
/* TODO: add NS locking */
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
-#include "dht-messages.h"
-#include "defaults.h"
#include <sys/time.h>
-
+#include <glusterfs/events.h>
int
-dht_du_info_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct statvfs *statvfs,
- dict_t *xdata)
+dht_du_info_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct statvfs *statvfs, dict_t *xdata)
{
- dht_conf_t *conf = NULL;
- call_frame_t *prev = NULL;
- int this_call_cnt = 0;
- int i = 0;
- double percent = 0;
- double percent_inodes = 0;
- uint64_t bytes = 0;
- uint32_t bpc; /* blocks per chunk */
- uint32_t chunks = 0;
-
- conf = this->private;
- prev = cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to get disk info from %s", prev->this->name);
- goto out;
- }
-
- if (statvfs && statvfs->f_blocks) {
- percent = (statvfs->f_bavail * 100) / statvfs->f_blocks;
- bytes = (statvfs->f_bavail * statvfs->f_frsize);
- /*
- * A 32-bit count of 1MB chunks allows a maximum brick size of
- * ~4PB. It's possible that we could see a single local FS
- * bigger than that some day, but this code is likely to be
- * irrelevant by then. Meanwhile, it's more important to keep
- * the chunk size small so the layout-calculation code that
- * uses this value can be tested on normal machines.
- */
- bpc = (1 << 20) / statvfs->f_bsize;
- chunks = (statvfs->f_blocks + bpc - 1) / bpc;
- }
-
- if (statvfs && statvfs->f_files) {
- percent_inodes = (statvfs->f_ffree * 100) / statvfs->f_files;
- } else {
- /*
- * Set percent inodes to 100 for dynamically allocated inode
- * filesystems. The rationale is that distribute need not
- * worry about total inodes; rather, let the 'create()' be
- * scheduled on the hashed subvol regardless of the total
- * inodes.
- */
- percent_inodes = 100;
- }
-
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++)
- if (prev->this == conf->subvolumes[i]) {
- conf->du_stats[i].avail_percent = percent;
- conf->du_stats[i].avail_space = bytes;
- conf->du_stats[i].avail_inodes = percent_inodes;
- conf->du_stats[i].chunks = chunks;
- gf_msg_debug (this->name, 0,
- "subvolume '%s': avail_percent "
- "is: %.2f and avail_space "
- "is: %" PRIu64" and avail_inodes"
- " is: %.2f",
- prev->this->name,
- conf->du_stats[i].avail_percent,
- conf->du_stats[i].avail_space,
- conf->du_stats[i].avail_inodes);
- break; /* no point in looping further */
- }
- }
- UNLOCK (&conf->subvolume_lock);
+ dht_conf_t *conf = NULL;
+ xlator_t *prev = NULL;
+ int this_call_cnt = 0;
+ int i = 0;
+ double percent = 0;
+ double percent_inodes = 0;
+ uint64_t bytes = 0;
+ uint32_t bpc; /* blocks per chunk */
+ uint32_t chunks = 0;
+
+ conf = this->private;
+ prev = cookie;
+
+ if (op_ret == -1 || !statvfs) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_GET_DISK_INFO_ERROR, "failed to get disk info from %s",
+ prev->name);
+ goto out;
+ }
+
+ if (statvfs->f_blocks) {
+ percent = (statvfs->f_bavail * 100) / statvfs->f_blocks;
+ bytes = (statvfs->f_bavail * statvfs->f_frsize);
+ /*
+ * A 32-bit count of 1MB chunks allows a maximum brick size of
+ * ~4PB. It's possible that we could see a single local FS
+ * bigger than that some day, but this code is likely to be
+ * irrelevant by then. Meanwhile, it's more important to keep
+ * the chunk size small so the layout-calculation code that
+ * uses this value can be tested on normal machines.
+ */
+ bpc = (1 << 20) / statvfs->f_bsize;
+ chunks = (statvfs->f_blocks + bpc - 1) / bpc;
+ }
+
+ if (statvfs->f_files) {
+ percent_inodes = (statvfs->f_ffree * 100) / statvfs->f_files;
+ } else {
+ /*
+ * Set percent inodes to 100 for dynamically allocated inode
+ * filesystems. The rationale is that distribute need not
+ * worry about total inodes; rather, let the 'create()' be
+ * scheduled on the hashed subvol regardless of the total
+ * inodes.
+ */
+ percent_inodes = 100;
+ }
+
+ LOCK(&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++)
+ if (prev == conf->subvolumes[i]) {
+ conf->du_stats[i].avail_percent = percent;
+ conf->du_stats[i].avail_space = bytes;
+ conf->du_stats[i].avail_inodes = percent_inodes;
+ conf->du_stats[i].chunks = chunks;
+ conf->du_stats[i].total_blocks = statvfs->f_blocks;
+ conf->du_stats[i].avail_blocks = statvfs->f_bavail;
+ conf->du_stats[i].frsize = statvfs->f_frsize;
+
+ gf_msg_debug(this->name, 0,
+ "subvolume '%s': avail_percent "
+ "is: %.2f and avail_space "
+ "is: %" PRIu64
+ " and avail_inodes"
+ " is: %.2f",
+ prev->name, conf->du_stats[i].avail_percent,
+ conf->du_stats[i].avail_space,
+ conf->du_stats[i].avail_inodes);
+ break; /* no point in looping further */
+ }
+ }
+ UNLOCK(&conf->subvolume_lock);
out:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_DESTROY (frame);
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt))
+ DHT_STACK_DESTROY(frame);
- return 0;
+ return 0;
}
int
-dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx)
+dht_get_du_info_for_subvol(xlator_t *this, int subvol_idx)
{
- dht_conf_t *conf = NULL;
- call_frame_t *statfs_frame = NULL;
- dht_local_t *statfs_local = NULL;
- call_pool_t *pool = NULL;
- loc_t tmp_loc = {0,};
-
- conf = this->private;
- pool = this->ctx->pool;
-
- statfs_frame = create_frame (this, pool);
- if (!statfs_frame) {
- goto err;
- }
-
- /* local->fop value is not used in this case */
- statfs_local = dht_local_init (statfs_frame, NULL, NULL,
- GF_FOP_MAXVALUE);
- if (!statfs_local) {
- goto err;
- }
-
- /* make it root gfid, should be enough to get the proper info back */
- tmp_loc.gfid[15] = 1;
-
- statfs_local->call_cnt = 1;
- STACK_WIND (statfs_frame, dht_du_info_cbk,
- conf->subvolumes[subvol_idx],
- conf->subvolumes[subvol_idx]->fops->statfs,
- &tmp_loc, NULL);
-
- return 0;
+ dht_conf_t *conf = NULL;
+ call_frame_t *statfs_frame = NULL;
+ dht_local_t *statfs_local = NULL;
+ call_pool_t *pool = NULL;
+ loc_t tmp_loc = {
+ 0,
+ };
+
+ conf = this->private;
+ pool = this->ctx->pool;
+
+ statfs_frame = create_frame(this, pool);
+ if (!statfs_frame) {
+ goto err;
+ }
+
+ /* local->fop value is not used in this case */
+ statfs_local = dht_local_init(statfs_frame, NULL, NULL, GF_FOP_MAXVALUE);
+ if (!statfs_local) {
+ goto err;
+ }
+
+ /* make it root gfid, should be enough to get the proper info back */
+ tmp_loc.gfid[15] = 1;
+
+ statfs_local->call_cnt = 1;
+ STACK_WIND_COOKIE(
+ statfs_frame, dht_du_info_cbk, conf->subvolumes[subvol_idx],
+ conf->subvolumes[subvol_idx],
+ conf->subvolumes[subvol_idx]->fops->statfs, &tmp_loc, NULL);
+
+ return 0;
err:
- if (statfs_frame)
- DHT_STACK_DESTROY (statfs_frame);
+ if (statfs_frame)
+ DHT_STACK_DESTROY(statfs_frame);
- return -1;
+ return -1;
}
int
-dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- int i = 0;
- int ret = -1;
- dht_conf_t *conf = NULL;
- call_frame_t *statfs_frame = NULL;
- dht_local_t *statfs_local = NULL;
- struct timeval tv = {0,};
- loc_t tmp_loc = {0,};
-
- conf = this->private;
-
- gettimeofday (&tv, NULL);
-
- /* make it root gfid, should be enough to get the proper
- info back */
- tmp_loc.gfid[15] = 1;
-
- if (tv.tv_sec > (conf->refresh_interval
- + conf->last_stat_fetch.tv_sec)) {
-
- statfs_frame = copy_frame (frame);
- if (!statfs_frame) {
- goto err;
- }
-
- /* In this case, 'local->fop' is not used */
- statfs_local = dht_local_init (statfs_frame, loc, NULL,
- GF_FOP_MAXVALUE);
- if (!statfs_local) {
- goto err;
- }
-
- statfs_local->params = dict_new ();
- if (!statfs_local->params)
- goto err;
-
- ret = dict_set_int8 (statfs_local->params,
- GF_INTERNAL_IGNORE_DEEM_STATFS, 1);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to set "
- GF_INTERNAL_IGNORE_DEEM_STATFS" in dict");
- goto err;
- }
+ int i = 0;
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ call_frame_t *statfs_frame = NULL;
+ dht_local_t *statfs_local = NULL;
+ loc_t tmp_loc = {
+ 0,
+ };
+ time_t now;
+
+ conf = this->private;
+ now = gf_time();
+ /* make it root gfid, should be enough to get the proper
+ info back */
+ tmp_loc.gfid[15] = 1;
+
+ if (now > (conf->refresh_interval + conf->last_stat_fetch)) {
+ statfs_frame = copy_frame(frame);
+ if (!statfs_frame) {
+ goto err;
+ }
+
+ /* In this case, 'local->fop' is not used */
+ statfs_local = dht_local_init(statfs_frame, loc, NULL, GF_FOP_MAXVALUE);
+ if (!statfs_local) {
+ goto err;
+ }
- statfs_local->call_cnt = conf->subvolume_cnt;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (statfs_frame, dht_du_info_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->statfs,
- &tmp_loc, statfs_local->params);
- }
-
- conf->last_stat_fetch.tv_sec = tv.tv_sec;
- }
- return 0;
+ statfs_local->params = dict_new();
+ if (!statfs_local->params)
+ goto err;
+
+ ret = dict_set_int8(statfs_local->params,
+ GF_INTERNAL_IGNORE_DEEM_STATFS, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set " GF_INTERNAL_IGNORE_DEEM_STATFS " in dict");
+ goto err;
+ }
+
+ statfs_local->call_cnt = conf->subvolume_cnt;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND_COOKIE(statfs_frame, dht_du_info_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->statfs, &tmp_loc,
+ statfs_local->params);
+ }
+
+ conf->last_stat_fetch = now;
+ }
+ return 0;
err:
- if (statfs_frame)
- DHT_STACK_DESTROY (statfs_frame);
+ if (statfs_frame)
+ DHT_STACK_DESTROY(statfs_frame);
- return -1;
+ return -1;
}
-
gf_boolean_t
-dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)
+dht_is_subvol_filled(xlator_t *this, xlator_t *subvol)
{
- int i = 0;
- dht_conf_t *conf = NULL;
- gf_boolean_t subvol_filled_inodes = _gf_false;
- gf_boolean_t subvol_filled_space = _gf_false;
- gf_boolean_t is_subvol_filled = _gf_false;
-
- conf = this->private;
-
- /* Check for values above specified percent or free disk */
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- if (conf->disk_unit == 'p') {
- if (conf->du_stats[i].avail_percent <
- conf->min_free_disk) {
- subvol_filled_space = _gf_true;
- break;
- }
-
- } else {
- if (conf->du_stats[i].avail_space <
- conf->min_free_disk) {
- subvol_filled_space = _gf_true;
- break;
- }
- }
- if (conf->du_stats[i].avail_inodes <
- conf->min_free_inodes) {
- subvol_filled_inodes = _gf_true;
- break;
- }
- }
- }
- }
- UNLOCK (&conf->subvolume_lock);
-
- if (subvol_filled_space && conf->subvolume_status[i]) {
- if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_SUBVOL_INSUFF_SPACE,
- "disk space on subvolume '%s' is getting "
- "full (%.2f %%), consider adding more bricks",
- subvol->name,
- (100 - conf->du_stats[i].avail_percent));
- }
- }
-
- if (subvol_filled_inodes && conf->subvolume_status[i]) {
- if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
- gf_msg (this->name, GF_LOG_CRITICAL, 0,
- DHT_MSG_SUBVOL_INSUFF_INODES,
- "inodes on subvolume '%s' are at "
- "(%.2f %%), consider adding more bricks",
- subvol->name,
- (100 - conf->du_stats[i].avail_inodes));
- }
- }
-
- is_subvol_filled = (subvol_filled_space || subvol_filled_inodes);
-
- return is_subvol_filled;
-}
+ int i = 0;
+ char vol_name[256];
+ dht_conf_t *conf = NULL;
+ gf_boolean_t subvol_filled_inodes = _gf_false;
+ gf_boolean_t subvol_filled_space = _gf_false;
+ gf_boolean_t is_subvol_filled = _gf_false;
+ double usage = 0;
+
+ conf = this->private;
+
+ /* Check for values above specified percent or free disk */
+ LOCK(&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ if (conf->disk_unit == 'p') {
+ if (conf->du_stats[i].avail_percent < conf->min_free_disk) {
+ subvol_filled_space = _gf_true;
+ break;
+ }
+
+ } else {
+ if (conf->du_stats[i].avail_space < conf->min_free_disk) {
+ subvol_filled_space = _gf_true;
+ break;
+ }
+ }
+ if (conf->du_stats[i].avail_inodes < conf->min_free_inodes) {
+ subvol_filled_inodes = _gf_true;
+ break;
+ }
+ }
+ }
+ }
+ UNLOCK(&conf->subvolume_lock);
+ if (subvol_filled_space && conf->subvolume_status[i]) {
+ if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
+ usage = 100 - conf->du_stats[i].avail_percent;
-/*Get the best subvolume to create the file in*/
-xlator_t *
-dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol,
- dht_local_t *local)
-{
- xlator_t *avail_subvol = NULL;
- dht_conf_t *conf = NULL;
- dht_layout_t *layout = NULL;
- loc_t *loc = NULL;
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SUBVOL_INSUFF_SPACE,
+ "disk space on subvolume '%s' is getting "
+ "full (%.2f %%), consider adding more bricks",
+ subvol->name, usage);
- conf = this->private;
- if (!local)
- goto out;
- loc = &local->loc;
- if (!local->layout) {
- layout = dht_layout_get (this, loc->parent);
-
- if (!layout) {
- gf_msg_debug (this->name, 0,
- "Missing layout. path=%s,"
- " parent gfid = %s", loc->path,
- uuid_utoa (loc->parent->gfid));
- goto out;
- }
- } else {
- layout = dht_layout_ref (this, local->layout);
+ (void)snprintf(vol_name, sizeof(vol_name), "%s", this->name);
+ vol_name[(strlen(this->name) - 4)] = '\0';
+
+ gf_event(EVENT_DHT_DISK_USAGE, "volume=%s;subvol=%s;usage=%.2f %%",
+ vol_name, subvol->name, usage);
}
+ }
+
+ if (subvol_filled_inodes && conf->subvolume_status[i]) {
+ if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
+ usage = 100 - conf->du_stats[i].avail_inodes;
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_SUBVOL_INSUFF_INODES,
+ "inodes on subvolume '%s' are at "
+ "(%.2f %%), consider adding more bricks",
+ subvol->name, usage);
+
+ (void)snprintf(vol_name, sizeof(vol_name), "%s", this->name);
+ vol_name[(strlen(this->name) - 4)] = '\0';
+
+ gf_event(EVENT_DHT_INODES_USAGE,
+ "volume=%s;subvol=%s;usage=%.2f %%", vol_name,
+ subvol->name, usage);
+ }
+ }
- LOCK (&conf->subvolume_lock);
- {
- avail_subvol = dht_subvol_with_free_space_inodes(this, subvol,
- layout);
- if(!avail_subvol)
- {
- avail_subvol = dht_subvol_maxspace_nonzeroinode(this,
- subvol,
- layout);
- }
+ is_subvol_filled = (subvol_filled_space || subvol_filled_inodes);
+
+ return is_subvol_filled;
+}
- }
- UNLOCK (&conf->subvolume_lock);
+/*Get the best subvolume to create the file in*/
+xlator_t *
+dht_free_disk_available_subvol(xlator_t *this, xlator_t *subvol,
+ dht_local_t *local)
+{
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ loc_t *loc = NULL;
+
+ conf = this->private;
+ if (!local)
+ goto out;
+ loc = &local->loc;
+ if (!local->layout) {
+ layout = dht_layout_get(this, loc->parent);
+
+ if (!layout) {
+ gf_msg_debug(this->name, 0,
+ "Missing layout. path=%s,"
+ " parent gfid = %s",
+ loc->path, uuid_utoa(loc->parent->gfid));
+ goto out;
+ }
+ } else {
+ layout = dht_layout_ref(this, local->layout);
+ }
+
+ LOCK(&conf->subvolume_lock);
+ {
+ avail_subvol = dht_subvol_with_free_space_inodes(this, subvol, NULL,
+ layout, 0);
+ if (!avail_subvol) {
+ avail_subvol = dht_subvol_maxspace_nonzeroinode(this, subvol,
+ layout);
+ }
+ }
+ UNLOCK(&conf->subvolume_lock);
out:
- if (!avail_subvol) {
- gf_msg_debug (this->name, 0,
- "No subvolume has enough free space \
+ if (!avail_subvol) {
+ gf_msg_debug(this->name, 0,
+ "No subvolume has enough free space \
and/or inodes to create");
- avail_subvol = subvol;
- }
+ avail_subvol = subvol;
+ }
- if (layout)
- dht_layout_unref (this, layout);
- return avail_subvol;
+ if (layout)
+ dht_layout_unref(this, layout);
+ return avail_subvol;
}
-static inline
-int32_t dht_subvol_has_err (xlator_t *this, dht_layout_t *layout)
+static inline int32_t
+dht_subvol_has_err(dht_conf_t *conf, xlator_t *this, xlator_t *ignore,
+ dht_layout_t *layout)
{
- int ret = -1;
- int i = 0;
+ int ret = -1;
+ int i = 0;
+
+ if (!this || !layout)
+ goto out;
+
+ /* this check is meant for rebalance process. The source of the file
+ * should be ignored for space check */
+ if (this == ignore) {
+ goto out;
+ }
+
+ /* check if subvol has layout errors, before selecting it */
+ for (i = 0; i < layout->cnt; i++) {
+ if (!strcmp(layout->list[i].xlator->name, this->name) &&
+ (layout->list[i].err != 0)) {
+ ret = -1;
+ goto out;
+ }
+ }
- if (!this || !layout)
+ /* discard decommissioned subvol */
+ if (conf->decommission_subvols_cnt) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i] &&
+ conf->decommissioned_bricks[i] == this) {
+ ret = -1;
goto out;
-
- /* check if subvol has layout errors, before selecting it */
- for (i = 0; i < layout->cnt; i++) {
- if (!strcmp (layout->list[i].xlator->name, this->name) &&
- (layout->list[i].err != 0)) {
- ret = -1;
- goto out;
- }
+ }
}
- ret = 0;
+ }
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
/*Get subvolume which has both space and inodes more than the min criteria*/
xlator_t *
dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
- dht_layout_t *layout)
+ xlator_t *ignore, dht_layout_t *layout,
+ uint64_t filesize)
{
- int i = 0;
- double max = 0;
- double max_inodes = 0;
- int ignore_subvol = 0;
-
- xlator_t *avail_subvol = NULL;
- dht_conf_t *conf = NULL;
-
- conf = this->private;
-
- for(i=0; i < conf->subvolume_cnt; i++) {
- /* check if subvol has layout errors, before selecting it */
- ignore_subvol = dht_subvol_has_err (conf->subvolumes[i],
- layout);
- if (ignore_subvol)
- continue;
-
- if ((conf->disk_unit == 'p') &&
- (conf->du_stats[i].avail_percent > conf->min_free_disk) &&
- (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
- if ((conf->du_stats[i].avail_inodes > max_inodes) ||
- (conf->du_stats[i].avail_percent > max)) {
- max = conf->du_stats[i].avail_percent;
- max_inodes = conf->du_stats[i].avail_inodes;
- avail_subvol = conf->subvolumes[i];
- }
- }
+ int i = 0;
+ double max = 0;
+ double max_inodes = 0;
+ int ignore_subvol = 0;
+ uint64_t total_blocks = 0;
+ uint64_t avail_blocks = 0;
+ uint64_t frsize = 0;
+ double post_availspace = 0;
+ double post_percent = 0;
+
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ /* check if subvol has layout errors and also it is not a
+ * decommissioned brick, before selecting it */
+ ignore_subvol = dht_subvol_has_err(conf, conf->subvolumes[i], ignore,
+ layout);
+ if (ignore_subvol)
+ continue;
+
+ if ((conf->disk_unit == 'p') &&
+ (conf->du_stats[i].avail_percent > conf->min_free_disk) &&
+ (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
+ if ((conf->du_stats[i].avail_inodes > max_inodes) ||
+ (conf->du_stats[i].avail_percent > max)) {
+ max = conf->du_stats[i].avail_percent;
+ max_inodes = conf->du_stats[i].avail_inodes;
+ avail_subvol = conf->subvolumes[i];
+ total_blocks = conf->du_stats[i].total_blocks;
+ avail_blocks = conf->du_stats[i].avail_blocks;
+ frsize = conf->du_stats[i].frsize;
+ }
+ }
- if ((conf->disk_unit != 'p') &&
- (conf->du_stats[i].avail_space > conf->min_free_disk) &&
- (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
- if ((conf->du_stats[i].avail_inodes > max_inodes) ||
- (conf->du_stats[i].avail_space > max)) {
- max = conf->du_stats[i].avail_space;
- max_inodes = conf->du_stats[i].avail_inodes;
- avail_subvol = conf->subvolumes[i];
- }
- }
+ if ((conf->disk_unit != 'p') &&
+ (conf->du_stats[i].avail_space > conf->min_free_disk) &&
+ (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
+ if ((conf->du_stats[i].avail_inodes > max_inodes) ||
+ (conf->du_stats[i].avail_space > max)) {
+ max = conf->du_stats[i].avail_space;
+ max_inodes = conf->du_stats[i].avail_inodes;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
+ }
+
+ if (avail_subvol) {
+ if (conf->disk_unit == 'p') {
+ post_availspace = (avail_blocks * frsize) - filesize;
+ post_percent = (post_availspace * 100) / (total_blocks * frsize);
+ if (post_percent < conf->min_free_disk)
+ avail_subvol = NULL;
}
+ if (conf->disk_unit != 'p') {
+ if ((max - filesize) < conf->min_free_disk)
+ avail_subvol = NULL;
+ }
+ }
- return avail_subvol;
+ return avail_subvol;
}
-
-/* Get subvol which has atleast one inode and maximum space */
+/* Get subvol which has at least one inode and maximum space */
xlator_t *
-dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol,
- dht_layout_t *layout)
+dht_subvol_maxspace_nonzeroinode(xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout)
{
- int i = 0;
- double max = 0;
- int ignore_subvol = 0;
-
- xlator_t *avail_subvol = NULL;
- dht_conf_t *conf = NULL;
-
- conf = this->private;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- /* check if subvol has layout errors, before selecting it */
- ignore_subvol = dht_subvol_has_err (conf->subvolumes[i],
- layout);
- if (ignore_subvol)
- continue;
-
- if (conf->disk_unit == 'p') {
- if ((conf->du_stats[i].avail_percent > max)
- && (conf->du_stats[i].avail_inodes > 0 )) {
- max = conf->du_stats[i].avail_percent;
- avail_subvol = conf->subvolumes[i];
- }
- } else {
- if ((conf->du_stats[i].avail_space > max)
- && (conf->du_stats[i].avail_inodes > 0)) {
- max = conf->du_stats[i].avail_space;
- avail_subvol = conf->subvolumes[i];
- }
- }
+ int i = 0;
+ double max = 0;
+ int ignore_subvol = 0;
+
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ /* check if subvol has layout errors and also it is not a
+ * decommissioned brick, before selecting it*/
+
+ ignore_subvol = dht_subvol_has_err(conf, conf->subvolumes[i], NULL,
+ layout);
+ if (ignore_subvol)
+ continue;
+
+ if (conf->disk_unit == 'p') {
+ if ((conf->du_stats[i].avail_percent > max) &&
+ (conf->du_stats[i].avail_inodes > 0)) {
+ max = conf->du_stats[i].avail_percent;
+ avail_subvol = conf->subvolumes[i];
+ }
+ } else {
+ if ((conf->du_stats[i].avail_space > max) &&
+ (conf->du_stats[i].avail_inodes > 0)) {
+ max = conf->du_stats[i].avail_space;
+ avail_subvol = conf->subvolumes[i];
+ }
}
+ }
- return avail_subvol;
+ return avail_subvol;
}
diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c
index 72b3df022da..acda67c312a 100644
--- a/xlators/cluster/dht/src/dht-hashfn.c
+++ b/xlators/cluster/dht/src/dht-hashfn.c
@@ -8,104 +8,103 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
-#include "hashfn.h"
+#include <glusterfs/hashfn.h>
-
-int
-dht_hash_compute_internal (int type, const char *name, uint32_t *hash_p)
+static int
+dht_hash_compute_internal(int type, const char *name, const int len,
+ uint32_t *hash_p)
{
- int ret = 0;
- uint32_t hash = 0;
+ int ret = 0;
+ uint32_t hash = 0;
- switch (type) {
+ switch (type) {
case DHT_HASH_TYPE_DM:
case DHT_HASH_TYPE_DM_USER:
- hash = gf_dm_hashfn (name, strlen (name));
- break;
+ hash = gf_dm_hashfn(name, len);
+ break;
default:
- ret = -1;
- break;
- }
+ ret = -1;
+ break;
+ }
- if (ret == 0) {
- *hash_p = hash;
- }
+ if (ret == 0) {
+ *hash_p = hash;
+ }
- return ret;
+ return ret;
}
-
-static inline
-gf_boolean_t
-dht_munge_name (const char *original, char *modified, size_t len, regex_t *re)
+/* The function returns:
+ * 0 : in case no munge took place
+ * >0 : the length (inc. terminating NULL!) of the newly modified string,
+ * if it was munged.
+ */
+static int
+dht_munge_name(const char *original, char *modified, size_t len, regex_t *re)
{
- regmatch_t matches[2];
- size_t new_len;
-
- if (regexec(re,original,2,matches,0) != REG_NOMATCH) {
- if (matches[1].rm_so != -1) {
- new_len = matches[1].rm_eo - matches[1].rm_so;
- /* Equal would fail due to the NUL at the end. */
- if (new_len < len) {
- memcpy (modified,original+matches[1].rm_so,
- new_len);
- modified[new_len] = '\0';
- return _gf_true;
- }
- }
+ regmatch_t matches[2] = {
+ {0},
+ };
+ size_t new_len = 0;
+ int ret = 0;
+
+ ret = regexec(re, original, 2, matches, 0);
+
+ if (ret != REG_NOMATCH) {
+ if (matches[1].rm_so != -1) {
+ new_len = matches[1].rm_eo - matches[1].rm_so;
+ /* Equal would fail due to the NUL at the end. */
+ if (new_len < len) {
+ memcpy(modified, original + matches[1].rm_so, new_len);
+ modified[new_len] = '\0';
+ return new_len + 1; /* +1 for the terminating NULL */
+ }
}
+ }
- /* This is guaranteed safe because of how the dest was allocated. */
- strcpy(modified,original);
- return _gf_false;
+ /* This is guaranteed safe because of how the dest was allocated. */
+ strcpy(modified, original);
+ return 0;
}
int
-dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p)
+dht_hash_compute(xlator_t *this, int type, const char *name, uint32_t *hash_p)
{
- char *rsync_friendly_name = NULL;
- dht_conf_t *priv = this->private;
- size_t len = 0;
- gf_boolean_t munged = _gf_false;
-
- /*
- * It wouldn't be safe to use alloca in an inline function that doesn't
- * actually get inlined, and it wouldn't be efficient to do a real
- * allocation, so we use alloca here (if needed) and pass that to the
- * inline.
- */
+ char *rsync_friendly_name = NULL;
+ dht_conf_t *priv = NULL;
+ size_t len = 0;
+ int munged = 0;
+ priv = this->private;
+
+ if (name == NULL)
+ return -1;
+
+ len = strlen(name) + 1;
+ rsync_friendly_name = alloca(len);
+
+ LOCK(&priv->lock);
+ {
if (priv->extra_regex_valid) {
- len = strlen(name) + 1;
- rsync_friendly_name = alloca(len);
- munged = dht_munge_name (name, rsync_friendly_name, len,
- &priv->extra_regex);
+ munged = dht_munge_name(name, rsync_friendly_name, len,
+ &priv->extra_regex);
}
if (!munged && priv->rsync_regex_valid) {
- len = strlen(name) + 1;
- rsync_friendly_name = alloca(len);
- gf_msg_trace (this->name, 0, "trying regex for %s", name);
- munged = dht_munge_name (name, rsync_friendly_name, len,
- &priv->rsync_regex);
- if (munged) {
- gf_msg_debug (this->name, 0,
- "munged down to %s", rsync_friendly_name);
- }
+ gf_msg_trace(this->name, 0, "trying regex for %s", name);
+ munged = dht_munge_name(name, rsync_friendly_name, len,
+ &priv->rsync_regex);
}
-
- if (!munged) {
- rsync_friendly_name = (char *)name;
- }
-
- return dht_hash_compute_internal (type, rsync_friendly_name, hash_p);
+ }
+ UNLOCK(&priv->lock);
+ if (munged) {
+ gf_msg_debug(this->name, 0, "munged down to %s", rsync_friendly_name);
+ len = munged;
+ } else {
+ rsync_friendly_name = (char *)name;
+ }
+
+ return dht_hash_compute_internal(type, rsync_friendly_name, len - 1,
+ hash_p);
}
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index cab66017b84..3f2fe43d5f3 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -8,1747 +8,2297 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
-#include "dht-helper.h"
+#include "dht-lock.h"
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
-static inline int
-dht_inode_ctx_set1 (xlator_t *this, inode_t *inode, xlator_t *subvol)
+static void
+dht_free_fd_ctx(dht_fd_ctx_t *fd_ctx)
{
- uint64_t tmp_subvol = 0;
-
- tmp_subvol = (long)subvol;
- return inode_ctx_set1 (inode, this, &tmp_subvol);
+ GF_FREE(fd_ctx);
}
-int
-dht_inode_ctx_get1 (xlator_t *this, inode_t *inode, xlator_t **subvol)
+int32_t
+dht_fd_ctx_destroy(xlator_t *this, fd_t *fd)
{
- int ret = -1;
- uint64_t tmp_subvol = 0;
-
- ret = inode_ctx_get1 (inode, this, &tmp_subvol);
- if (tmp_subvol && subvol)
- *subvol = (xlator_t *)tmp_subvol;
-
- return ret;
+ dht_fd_ctx_t *fd_ctx = NULL;
+ uint64_t value = 0;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ ret = fd_ctx_del(fd, this, &value);
+ if (ret) {
+ goto out;
+ }
+
+ fd_ctx = (dht_fd_ctx_t *)(uintptr_t)value;
+ if (fd_ctx) {
+ GF_REF_PUT(fd_ctx);
+ }
+out:
+ return ret;
}
-
-int
-dht_frame_return (call_frame_t *frame)
+static int
+__dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *dst)
{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
+ dht_fd_ctx_t *fd_ctx = NULL;
+ uint64_t value = 0;
+ int ret = -1;
- if (!frame)
- return -1;
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
- local = frame->local;
+ fd_ctx = GF_CALLOC(1, sizeof(*fd_ctx), gf_dht_mt_fd_ctx_t);
- LOCK (&frame->lock);
- {
- this_call_cnt = --local->call_cnt;
- }
- UNLOCK (&frame->lock);
+ if (!fd_ctx) {
+ goto out;
+ }
- return this_call_cnt;
-}
+ fd_ctx->opened_on_dst = (uint64_t)(uintptr_t)dst;
+ GF_REF_INIT(fd_ctx, dht_free_fd_ctx);
-/*
- * A slightly "updated" version of the algorithm described in the commit log
- * is used here.
- *
- * The only enhancement is that:
- *
- * - The number of bits used by the backend filesystem for HUGE d_off which
- * is described as 63, and
- * - The number of bits used by the d_off presented by the transformation
- * upwards which is described as 64, are both made "configurable."
- */
+ value = (uint64_t)(uintptr_t)fd_ctx;
+
+ ret = __fd_ctx_set(fd, this, value);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FD_CTX_SET_FAILED,
+ "fd=0x%p", fd, NULL);
+ GF_REF_PUT(fd_ctx);
+ }
+out:
+ return ret;
+}
int
-dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
- xlator_t **subvol)
+dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *dst)
{
- char *new_name = NULL;
- char *new_path = NULL;
- xlator_list_t *trav = NULL;
- char key[1024] = {0,};
- int ret = 0; /* not found */
-
- /* Why do other tasks if first required 'char' itself is not there */
- if (!new_loc || !loc || !loc->name || !strchr (loc->name, '@'))
- goto out;
+ dht_fd_ctx_t *fd_ctx = NULL;
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ LOCK(&fd->lock);
+ {
+ ret = __fd_ctx_get(fd, this, &value);
+ if (ret && value) {
+ fd_ctx = (dht_fd_ctx_t *)(uintptr_t)value;
+ if (fd_ctx->opened_on_dst == (uint64_t)(uintptr_t)dst) {
+ /* This could happen due to racing
+ * check_progress tasks*/
+ goto unlock;
+ } else {
+ /* This would be a big problem*/
+ /* Overwrite and hope for the best*/
+ fd_ctx->opened_on_dst = (uint64_t)(uintptr_t)dst;
+ UNLOCK(&fd->lock);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_VALUE,
+ NULL);
- trav = this->children;
- while (trav) {
- snprintf (key, 1024, "*@%s:%s", this->name, trav->xlator->name);
- if (fnmatch (key, loc->name, FNM_NOESCAPE) == 0) {
- new_name = GF_CALLOC(strlen (loc->name),
- sizeof (char),
- gf_common_mt_char);
- if (!new_name)
- goto out;
- if (fnmatch (key, loc->path, FNM_NOESCAPE) == 0) {
- new_path = GF_CALLOC(strlen (loc->path),
- sizeof (char),
- gf_common_mt_char);
- if (!new_path)
- goto out;
- strncpy (new_path, loc->path, (strlen (loc->path) -
- strlen (key) + 1));
- }
- strncpy (new_name, loc->name, (strlen (loc->name) -
- strlen (key) + 1));
-
- if (new_loc) {
- new_loc->path = ((new_path) ? new_path:
- gf_strdup (loc->path));
- new_loc->name = new_name;
- new_loc->inode = inode_ref (loc->inode);
- new_loc->parent = inode_ref (loc->parent);
- }
- *subvol = trav->xlator;
- ret = 1; /* success */
- goto out;
- }
- trav = trav->next;
+ goto out;
+ }
}
+ ret = __dht_fd_ctx_set(this, fd, dst);
+ }
+unlock:
+ UNLOCK(&fd->lock);
out:
- if (!ret) {
- /* !success */
- GF_FREE (new_path);
- GF_FREE (new_name);
+ return ret;
+}
+
+static dht_fd_ctx_t *
+dht_fd_ctx_get(xlator_t *this, fd_t *fd)
+{
+ dht_fd_ctx_t *fd_ctx = NULL;
+ int ret = -1;
+ uint64_t tmp_val = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ LOCK(&fd->lock);
+ {
+ ret = __fd_ctx_get(fd, this, &tmp_val);
+ if ((ret < 0) || (tmp_val == 0)) {
+ goto unlock;
}
- return ret;
+
+ fd_ctx = (dht_fd_ctx_t *)(uintptr_t)tmp_val;
+ GF_REF_GET(fd_ctx);
+ }
+unlock:
+ UNLOCK(&fd->lock);
+
+out:
+ return fd_ctx;
}
-static xlator_t *
-dht_get_subvol_from_id(xlator_t *this, int client_id)
+gf_boolean_t
+dht_fd_open_on_dst(xlator_t *this, fd_t *fd, xlator_t *dst)
{
- xlator_t *xl = NULL;
- dht_conf_t *conf = NULL;
- char sid[6] = { 0 };
+ dht_fd_ctx_t *fd_ctx = NULL;
+ gf_boolean_t opened = _gf_false;
- conf = this->private;
+ fd_ctx = dht_fd_ctx_get(this, fd);
- sprintf(sid, "%d", client_id);
- if (dict_get_ptr(conf->leaf_to_subvol, sid, (void **) &xl))
- xl = NULL;
+ if (fd_ctx) {
+ if (fd_ctx->opened_on_dst == (uint64_t)(uintptr_t)dst) {
+ opened = _gf_true;
+ }
+ GF_REF_PUT(fd_ctx);
+ }
- return xl;
+ return opened;
}
-int
-dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p)
+void
+dht_free_mig_info(void *data)
{
- int client_id = 0;
- xlator_t *subvol = 0;
- dht_conf_t *conf = NULL;
+ dht_migrate_info_t *miginfo = NULL;
+
+ miginfo = data;
+ GF_FREE(miginfo);
- if (!this->private)
- return -1;
+ return;
+}
- conf = this->private;
+static int
+dht_inode_ctx_set_mig_info(xlator_t *this, inode_t *inode, xlator_t *src_subvol,
+ xlator_t *dst_subvol)
+{
+ dht_migrate_info_t *miginfo = NULL;
+ uint64_t value = 0;
+ int ret = -1;
- client_id = gf_deitransform(this, y);
+ miginfo = GF_CALLOC(1, sizeof(*miginfo), gf_dht_mt_miginfo_t);
+ if (miginfo == NULL)
+ goto out;
- subvol = dht_get_subvol_from_id(this, client_id);
+ miginfo->src_subvol = src_subvol;
+ miginfo->dst_subvol = dst_subvol;
+ GF_REF_INIT(miginfo, dht_free_mig_info);
- if (!subvol)
- subvol = conf->subvolumes[0];
+ value = (uint64_t)(uintptr_t)miginfo;
- if (subvol_p)
- *subvol_p = subvol;
+ ret = inode_ctx_set1(inode, this, &value);
+ if (ret < 0) {
+ GF_REF_PUT(miginfo);
+ }
- return 0;
+out:
+ return ret;
}
-char *
-dht_lock_asprintf (dht_lock_t *lock)
+int
+dht_inode_ctx_get_mig_info(xlator_t *this, inode_t *inode,
+ xlator_t **src_subvol, xlator_t **dst_subvol)
{
- char *lk_buf = NULL;
- char gfid[GF_UUID_BUF_SIZE] = {0, };
+ int ret = -1;
+ uint64_t tmp_miginfo = 0;
+ dht_migrate_info_t *miginfo = NULL;
- if (lock == NULL)
- goto out;
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_get1(inode, this, &tmp_miginfo);
+ if ((ret < 0) || (tmp_miginfo == 0)) {
+ UNLOCK(&inode->lock);
+ goto out;
+ }
- uuid_utoa_r (lock->loc.gfid, gfid);
+ miginfo = (dht_migrate_info_t *)(uintptr_t)tmp_miginfo;
+ GF_REF_GET(miginfo);
+ }
+ UNLOCK(&inode->lock);
- gf_asprintf (&lk_buf, "%s:%s", lock->xl->name, gfid);
+ if (src_subvol)
+ *src_subvol = miginfo->src_subvol;
+
+ if (dst_subvol)
+ *dst_subvol = miginfo->dst_subvol;
+
+ GF_REF_PUT(miginfo);
out:
- return lk_buf;
+ return ret;
}
-void
-dht_log_lk_array (char *name, gf_loglevel_t log_level, dht_lock_t **lk_array,
- int count)
+gf_boolean_t
+dht_mig_info_is_invalid(xlator_t *current, xlator_t *src_subvol,
+ xlator_t *dst_subvol)
{
- int i = 0;
- char *lk_buf = NULL;
+ /* Not set
+ */
+ if (!src_subvol || !dst_subvol)
+ return _gf_true;
+
+ /* Invalid scenarios:
+ * The src_subvol does not match the subvol on which the current op was sent
+ * so the cached subvol has changed between the last mig_info_set and now.
+ * src_subvol == dst_subvol. The file was migrated without any FOP detecting
+ * a P2 so the old dst is now the current subvol.
+ *
+ * There is still one scenario where the info could be outdated - if
+ * file has undergone multiple migrations and ends up on the same src_subvol
+ * on which the mig_info was first set.
+ */
+ if ((current == dst_subvol) || (current != src_subvol))
+ return _gf_true;
+
+ return _gf_false;
+}
- if ((lk_array == NULL) || (count == 0))
- goto out;
+/* Used to check if fd fops have the fd opened on the cached subvol
+ * This is required when:
+ * 1. an fd is opened on FILE1 on subvol1
+ * 2. the file is migrated to subvol2
+ * 3. a lookup updates the cached subvol in the inode_ctx to subvol2
+ * 4. a write comes on the fd
+ * The write is sent to subvol2 on an fd which has been opened only on fd1
+ * Since the migration phase checks don't kick in, the fop fails with EBADF
+ *
+ */
- for (i = 0; i < count; i++) {
- lk_buf = dht_lock_asprintf (lk_array[i]);
- gf_log (name, log_level, "%d. %s", i, lk_buf);
- GF_FREE (lk_buf);
- }
+int
+dht_check_and_open_fd_on_subvol_complete(int ret, call_frame_t *frame,
+ void *data)
+{
+ glusterfs_fop_t fop = 0;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ fd_t *fd = NULL;
+ int op_errno = -1;
+
+ local = frame->local;
+ this = frame->this;
+ fop = local->fop;
+ subvol = local->cached_subvol;
+ fd = local->fd;
+
+ if (ret) {
+ op_errno = local->op_errno;
+ goto handle_err;
+ }
+
+ switch (fop) {
+ case GF_FOP_WRITE:
+ STACK_WIND_COOKIE(frame, dht_writev_cbk, subvol, subvol,
+ subvol->fops->writev, fd, local->rebalance.vector,
+ local->rebalance.count, local->rebalance.offset,
+ local->rebalance.flags, local->rebalance.iobref,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_FLUSH:
+ STACK_WIND(frame, dht_flush_cbk, subvol, subvol->fops->flush, fd,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_FSETATTR:
+ STACK_WIND_COOKIE(frame, dht_file_setattr_cbk, subvol, subvol,
+ subvol->fops->fsetattr, fd,
+ &local->rebalance.stbuf, local->rebalance.flags,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_ZEROFILL:
+ STACK_WIND_COOKIE(frame, dht_zerofill_cbk, subvol, subvol,
+ subvol->fops->zerofill, fd,
+ local->rebalance.offset, local->rebalance.size,
+ local->xattr_req);
+
+ break;
+
+ case GF_FOP_DISCARD:
+ STACK_WIND_COOKIE(frame, dht_discard_cbk, subvol, subvol,
+ subvol->fops->discard, local->fd,
+ local->rebalance.offset, local->rebalance.size,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_FALLOCATE:
+ STACK_WIND_COOKIE(frame, dht_fallocate_cbk, subvol, subvol,
+ subvol->fops->fallocate, fd,
+ local->rebalance.flags, local->rebalance.offset,
+ local->rebalance.size, local->xattr_req);
+ break;
+
+ case GF_FOP_FTRUNCATE:
+ STACK_WIND_COOKIE(frame, dht_truncate_cbk, subvol, subvol,
+ subvol->fops->ftruncate, fd,
+ local->rebalance.offset, local->xattr_req);
+ break;
+
+ case GF_FOP_FSYNC:
+ STACK_WIND_COOKIE(frame, dht_fsync_cbk, subvol, subvol,
+ subvol->fops->fsync, local->fd,
+ local->rebalance.flags, local->xattr_req);
+ break;
+
+ case GF_FOP_READ:
+ STACK_WIND(frame, dht_readv_cbk, subvol, subvol->fops->readv,
+ local->fd, local->rebalance.size,
+ local->rebalance.offset, local->rebalance.flags,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_FSTAT:
+ STACK_WIND_COOKIE(frame, dht_file_attr_cbk, subvol, subvol,
+ subvol->fops->fstat, fd, local->xattr_req);
+ break;
+
+ case GF_FOP_FSETXATTR:
+ STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
+ subvol->fops->fsetxattr, local->fd,
+ local->rebalance.xattr, local->rebalance.flags,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_FREMOVEXATTR:
+ STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
+ subvol->fops->fremovexattr, local->fd, local->key,
+ local->xattr_req);
+
+ break;
+
+ case GF_FOP_FXATTROP:
+ STACK_WIND(frame, dht_common_xattrop_cbk, subvol,
+ subvol->fops->fxattrop, local->fd,
+ local->rebalance.flags, local->rebalance.xattr,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_FGETXATTR:
+ STACK_WIND(frame, dht_getxattr_cbk, subvol, subvol->fops->fgetxattr,
+ local->fd, local->key, NULL);
+ break;
+
+ case GF_FOP_FINODELK:
+ STACK_WIND(frame, dht_finodelk_cbk, subvol, subvol->fops->finodelk,
+ local->key, local->fd, local->rebalance.lock_cmd,
+ &local->rebalance.flock, local->xattr_req);
+ break;
+ default:
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, "fd=%p",
+ fd, "gfid=%s", uuid_utoa(fd->inode->gfid), "name=%s",
+ subvol->name, NULL);
+ break;
+ }
+
+ goto out;
+
+ /* Could not open the fd on the dst. Unwind */
+
+handle_err:
+
+ switch (fop) {
+ case GF_FOP_WRITE:
+ DHT_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_FLUSH:
+ DHT_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
+ break;
+
+ case GF_FOP_FSETATTR:
+ DHT_STACK_UNWIND(fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_ZEROFILL:
+ DHT_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_DISCARD:
+ DHT_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_FALLOCATE:
+ DHT_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_FTRUNCATE:
+ DHT_STACK_UNWIND(ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_FSYNC:
+ DHT_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_READ:
+ DHT_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL,
+ NULL);
+ break;
+
+ case GF_FOP_FSTAT:
+ DHT_STACK_UNWIND(fstat, frame, -1, op_errno, NULL, NULL);
+ break;
+
+ case GF_FOP_FSETXATTR:
+ DHT_STACK_UNWIND(fsetxattr, frame, -1, op_errno, NULL);
+ break;
+
+ case GF_FOP_FREMOVEXATTR:
+ DHT_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL);
+ break;
+
+ case GF_FOP_FXATTROP:
+ DHT_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL);
+ break;
+
+ case GF_FOP_FGETXATTR:
+ DHT_STACK_UNWIND(fgetxattr, frame, -1, op_errno, NULL, NULL);
+ break;
+
+ case GF_FOP_FINODELK:
+ DHT_STACK_UNWIND(finodelk, frame, -1, op_errno, NULL);
+ break;
+
+ default:
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, "fd=%p",
+ fd, "gfid=%s", uuid_utoa(fd->inode->gfid), "name=%s",
+ subvol->name, NULL);
+ break;
+ }
out:
- return;
+
+ return 0;
}
-void
-dht_lock_stack_destroy (call_frame_t *lock_frame)
+/* Check once again if the fd has been opened on the cached subvol.
+ * If not, open and update the fd_ctx.
+ */
+
+int
+dht_check_and_open_fd_on_subvol_task(void *data)
{
- dht_local_t *local = NULL;
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ dht_local_t *local = NULL;
+ fd_t *fd = NULL;
+ xlator_t *this = NULL;
+ xlator_t *subvol = NULL;
+
+ frame = data;
+ local = frame->local;
+ this = THIS;
+ fd = local->fd;
+ subvol = local->cached_subvol;
+
+ local->fd_checked = _gf_true;
+
+ if (fd_is_anonymous(fd) || dht_fd_open_on_dst(this, fd, subvol)) {
+ ret = 0;
+ goto out;
+ }
- local = lock_frame->local;
+ gf_msg_debug(this->name, 0, "Opening fd (%p, flags=0%o) on file %s @ %s",
+ fd, fd->flags, uuid_utoa(fd->inode->gfid), subvol->name);
- local->lock.locks = NULL;
- local->lock.lk_count = 0;
+ loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(loc.gfid, fd->inode->gfid);
- DHT_STACK_DESTROY (lock_frame);
- return;
+ /* Open this on the dst subvol */
+
+ SYNCTASK_SETID(0, 0);
+
+ ret = syncop_open(subvol, &loc, (fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)),
+ fd, NULL, NULL);
+
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_OPEN_FD_ON_DST_FAILED,
+ "fd=%p", fd, "flags=0%o", fd->flags, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), "name=%s", subvol->name, NULL);
+ /* This can happen if the cached subvol was updated in the
+ * inode_ctx and the fd was opened on the new cached suvol
+ * after this fop was wound on the old cached subvol.
+ * As we do not close the fd on the old subvol (a leak)
+ * don't treat ENOENT as an error and allow the phase1/phase2
+ * checks to handle it.
+ */
+
+ if ((-ret != ENOENT) && (-ret != ESTALE)) {
+ local->op_errno = -ret;
+ ret = -1;
+ } else {
+ ret = 0;
+ }
+
+ local->op_errno = -ret;
+ ret = -1;
+
+ } else {
+ dht_fd_ctx_set(this, fd, subvol);
+ }
+
+ SYNCTASK_SETID(frame->root->uid, frame->root->gid);
+out:
+ loc_wipe(&loc);
+
+ return ret;
}
-void
-dht_lock_free (dht_lock_t *lock)
+int
+dht_check_and_open_fd_on_subvol(xlator_t *this, call_frame_t *frame)
{
- if (lock == NULL)
- goto out;
+ int ret = -1;
+ dht_local_t *local = NULL;
- loc_wipe (&lock->loc);
- GF_FREE (lock->domain);
- mem_put (lock);
+ /*
+ if (dht_fd_open_on_dst (this, fd, subvol))
+ goto out;
+ */
+ local = frame->local;
-out:
- return;
+ ret = synctask_new(this->ctx->env, dht_check_and_open_fd_on_subvol_task,
+ dht_check_and_open_fd_on_subvol_complete, frame, frame);
+
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SYNCTASK_CREATE_FAILED,
+ "to-check-and-open fd=%p", local->fd, NULL);
+ }
+
+ return ret;
}
-void
-dht_lock_array_free (dht_lock_t **lk_array, int count)
+int
+dht_frame_return(call_frame_t *frame)
{
- int i = 0;
- dht_lock_t *lock = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
- if (lk_array == NULL)
- goto out;
+ if (!frame)
+ return -1;
- for (i = 0; i < count; i++) {
- lock = lk_array[i];
- lk_array[i] = NULL;
- dht_lock_free (lock);
- }
+ local = frame->local;
+
+ LOCK(&frame->lock);
+ {
+ this_call_cnt = --local->call_cnt;
+ }
+ UNLOCK(&frame->lock);
+ return this_call_cnt;
+}
+
+/*
+ * Use this function to specify which subvol you want the file created
+ * on - this need not be the hashed subvol.
+ * Format: <filename>@<this->name>:<subvol-name>
+ * Eg: file-1@vol1-dht:vol1-client-0
+ * where vol1 is a pure distribute volume
+ * will create file-1 on vol1-client-0
+ */
+
+int
+dht_filter_loc_subvol_key(xlator_t *this, loc_t *loc, loc_t *new_loc,
+ xlator_t **subvol)
+{
+ char *new_name = NULL;
+ char *new_path = NULL;
+ xlator_list_t *trav = NULL;
+ char key[1024] = {
+ 0,
+ };
+ int ret = 0; /* not found */
+ int keylen = 0;
+ int name_len = 0;
+ int path_len = 0;
+
+ /* Why do other tasks if first required 'char' itself is not there */
+ if (!new_loc || !loc || !loc->name || !strchr(loc->name, '@')) {
+ /* Skip the GF_FREE checks here */
+ return ret;
+ }
+
+ trav = this->children;
+ while (trav) {
+ keylen = snprintf(key, sizeof(key), "*@%s:%s", this->name,
+ trav->xlator->name);
+ /* Ignore '*' */
+ keylen = keylen - 1;
+ if (fnmatch(key, loc->name, FNM_NOESCAPE) == 0) {
+ name_len = strlen(loc->name) - keylen;
+ new_name = GF_MALLOC(name_len + 1, gf_common_mt_char);
+ if (!new_name)
+ goto out;
+ if (fnmatch(key, loc->path, FNM_NOESCAPE) == 0) {
+ path_len = strlen(loc->path) - keylen;
+ new_path = GF_MALLOC(path_len + 1, gf_common_mt_char);
+ if (!new_path)
+ goto out;
+ snprintf(new_path, path_len + 1, "%s", loc->path);
+ }
+ snprintf(new_name, name_len + 1, "%s", loc->name);
+
+ if (new_loc) {
+ new_loc->path = ((new_path) ? new_path : gf_strdup(loc->path));
+ new_loc->name = new_name;
+ new_loc->inode = inode_ref(loc->inode);
+ new_loc->parent = inode_ref(loc->parent);
+ }
+ *subvol = trav->xlator;
+ ret = 1; /* success */
+ goto out;
+ }
+ trav = trav->next;
+ }
out:
- return;
+ if (!ret) {
+ /* !success */
+ GF_FREE(new_path);
+ GF_FREE(new_name);
+ }
+ return ret;
}
-dht_lock_t *
-dht_lock_new (xlator_t *this, xlator_t *xl, loc_t *loc, short type,
- const char *domain)
+static xlator_t *
+dht_get_subvol_from_id(xlator_t *this, int client_id)
{
- dht_conf_t *conf = NULL;
- dht_lock_t *lock = NULL;
+ xlator_t *xl = NULL;
+ dht_conf_t *conf = NULL;
+ char *sid = NULL;
+ int32_t ret = -1;
- conf = this->private;
+ conf = this->private;
- lock = mem_get0 (conf->lock_pool);
- if (lock == NULL)
- goto out;
+ ret = gf_asprintf(&sid, "%d", client_id);
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_ASPRINTF_FAILED, NULL);
+ goto out;
+ }
- lock->xl = xl;
- lock->type = type;
- lock->domain = gf_strdup (domain);
- if (lock->domain == NULL) {
- dht_lock_free (lock);
- lock = NULL;
- goto out;
- }
+ if (dict_get_ptr(conf->leaf_to_subvol, sid, (void **)&xl))
+ xl = NULL;
- /* Fill only inode and gfid.
- posix and protocol/server give preference to pargfid/basename over
- gfid/inode for resolution if all the three parameters of loc_t are
- present. I want to avoid the following hypothetical situation:
-
- 1. rebalance did a lookup on a dentry and got a gfid.
- 2. rebalance acquires lock on loc_t which was filled with gfid and
- path (pargfid/bname) from step 1.
- 3. somebody deleted and recreated the same file
- 4. rename on the same path acquires lock on loc_t which now points
- to a different inode (and hence gets the lock).
- 5. rebalance continues to migrate file (note that not all fops done
- by rebalance during migration are inode/gfid based Eg., unlink)
- 6. rename continues.
- */
- lock->loc.inode = inode_ref (loc->inode);
- loc_gfid (loc, lock->loc.gfid);
+ GF_FREE(sid);
out:
- return lock;
+ return xl;
}
int
-dht_local_lock_init (call_frame_t *frame, dht_lock_t **lk_array,
- int lk_count, fop_inodelk_cbk_t inodelk_cbk)
+dht_deitransform(xlator_t *this, uint64_t y, xlator_t **subvol_p)
{
- int ret = -1;
- dht_local_t *local = NULL;
+ int client_id = 0;
+ xlator_t *subvol = 0;
+ dht_conf_t *conf = NULL;
- local = frame->local;
+ if (!this->private)
+ return -1;
- if (local == NULL) {
- local = dht_local_init (frame, NULL, NULL, 0);
- }
+ conf = this->private;
- if (local == NULL) {
- goto out;
- }
+ client_id = gf_deitransform(this, y);
- local->lock.inodelk_cbk = inodelk_cbk;
- local->lock.locks = lk_array;
- local->lock.lk_count = lk_count;
+ subvol = dht_get_subvol_from_id(this, client_id);
- ret = dht_lock_order_requests (local->lock.locks,
- local->lock.lk_count);
- if (ret < 0)
- goto out;
+ if (!subvol)
+ subvol = conf->subvolumes[0];
- ret = 0;
-out:
- return ret;
+ if (subvol_p)
+ *subvol_p = subvol;
+
+ return 0;
}
void
-dht_local_wipe (xlator_t *this, dht_local_t *local)
+dht_local_wipe(xlator_t *this, dht_local_t *local)
{
- if (!local)
- return;
+ int i = 0;
- loc_wipe (&local->loc);
- loc_wipe (&local->loc2);
+ if (!local)
+ return;
- if (local->xattr)
- dict_unref (local->xattr);
+ loc_wipe(&local->loc);
+ loc_wipe(&local->loc2);
+ loc_wipe(&local->loc2_copy);
- if (local->inode)
- inode_unref (local->inode);
+ if (local->xattr)
+ dict_unref(local->xattr);
- if (local->layout) {
- dht_layout_unref (this, local->layout);
- local->layout = NULL;
- }
+ if (local->inode)
+ inode_unref(local->inode);
- loc_wipe (&local->linkfile.loc);
+ if (local->layout) {
+ dht_layout_unref(this, local->layout);
+ local->layout = NULL;
+ }
- if (local->linkfile.xattr)
- dict_unref (local->linkfile.xattr);
+ loc_wipe(&local->linkfile.loc);
- if (local->linkfile.inode)
- inode_unref (local->linkfile.inode);
+ if (local->linkfile.xattr)
+ dict_unref(local->linkfile.xattr);
- if (local->fd) {
- fd_unref (local->fd);
- local->fd = NULL;
- }
+ if (local->linkfile.inode)
+ inode_unref(local->linkfile.inode);
- if (local->params) {
- dict_unref (local->params);
- local->params = NULL;
- }
+ if (local->fd) {
+ fd_unref(local->fd);
+ local->fd = NULL;
+ }
- if (local->xattr_req)
- dict_unref (local->xattr_req);
+ if (local->params) {
+ dict_unref(local->params);
+ local->params = NULL;
+ }
- if (local->selfheal.layout) {
- dht_layout_unref (this, local->selfheal.layout);
- local->selfheal.layout = NULL;
- }
+ if (local->xattr_req)
+ dict_unref(local->xattr_req);
+ if (local->mds_xattr)
+ dict_unref(local->mds_xattr);
+ if (local->xdata)
+ dict_unref(local->xdata);
- dht_lock_array_free (local->lock.locks, local->lock.lk_count);
- GF_FREE (local->lock.locks);
+ if (local->selfheal.layout) {
+ dht_layout_unref(this, local->selfheal.layout);
+ local->selfheal.layout = NULL;
+ }
- GF_FREE (local->newpath);
+ if (local->selfheal.refreshed_layout) {
+ dht_layout_unref(this, local->selfheal.refreshed_layout);
+ local->selfheal.refreshed_layout = NULL;
+ }
- GF_FREE (local->key);
+ for (i = 0; i < 2; i++) {
+ dht_lock_array_free(local->lock[i].ns.parent_layout.locks,
+ local->lock[i].ns.parent_layout.lk_count);
- GF_FREE (local->rebalance.vector);
+ GF_FREE(local->lock[i].ns.parent_layout.locks);
- if (local->rebalance.iobref)
- iobref_unref (local->rebalance.iobref);
+ dht_lock_array_free(local->lock[i].ns.directory_ns.locks,
+ local->lock[i].ns.directory_ns.lk_count);
+ GF_FREE(local->lock[i].ns.directory_ns.locks);
+ }
- mem_put (local);
-}
+ GF_FREE(local->key);
+
+ if (local->rebalance.xdata)
+ dict_unref(local->rebalance.xdata);
+
+ if (local->rebalance.xattr)
+ dict_unref(local->rebalance.xattr);
+
+ if (local->rebalance.dict)
+ dict_unref(local->rebalance.dict);
+ GF_FREE(local->rebalance.vector);
+
+ if (local->rebalance.iobref)
+ iobref_unref(local->rebalance.iobref);
+
+ if (local->stub) {
+ call_stub_destroy(local->stub);
+ local->stub = NULL;
+ }
+
+ if (local->ret_cache)
+ GF_FREE(local->ret_cache);
+
+ mem_put(local);
+}
dht_local_t *
-dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop)
+dht_local_init(call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop)
{
- dht_local_t *local = NULL;
- inode_t *inode = NULL;
- int ret = 0;
+ dht_local_t *local = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
- local = mem_get0 (THIS->local_pool);
- if (!local)
- goto out;
+ local = mem_get0(THIS->local_pool);
+ if (!local)
+ goto out;
- if (loc) {
- ret = loc_copy (&local->loc, loc);
- if (ret)
- goto out;
+ if (loc) {
+ ret = loc_copy(&local->loc, loc);
+ if (ret)
+ goto out;
- inode = loc->inode;
- }
+ inode = loc->inode;
+ }
- if (fd) {
- local->fd = fd_ref (fd);
- if (!inode)
- inode = fd->inode;
- }
+ if (fd) {
+ local->fd = fd_ref(fd);
+ if (!inode)
+ inode = fd->inode;
+ }
- local->op_ret = -1;
- local->op_errno = EUCLEAN;
- local->fop = fop;
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+ local->fop = fop;
- if (inode) {
- local->layout = dht_layout_get (frame->this, inode);
- local->cached_subvol = dht_subvol_get_cached (frame->this,
- inode);
- }
+ if (inode) {
+ local->layout = dht_layout_get(frame->this, inode);
+ local->cached_subvol = dht_subvol_get_cached(frame->this, inode);
+ }
- frame->local = local;
+ frame->local = local;
out:
- if (ret) {
- if (local)
- mem_put (local);
- local = NULL;
- }
- return local;
+ if (ret) {
+ if (local)
+ mem_put(local);
+ local = NULL;
+ }
+ return local;
}
xlator_t *
-dht_first_up_subvol (xlator_t *this)
+dht_first_up_subvol(xlator_t *this)
{
- dht_conf_t *conf = NULL;
- xlator_t *child = NULL;
- int i = 0;
- time_t time = 0;
+ dht_conf_t *conf = NULL;
+ xlator_t *child = NULL;
+ int i = 0;
+ time_t time = 0;
- conf = this->private;
- if (!conf)
- goto out;
+ conf = this->private;
+ if (!conf)
+ goto out;
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvol_up_time[i]) {
- if (!time) {
- time = conf->subvol_up_time[i];
- child = conf->subvolumes[i];
- } else if (time > conf->subvol_up_time[i]) {
- time = conf->subvol_up_time[i];
- child = conf->subvolumes[i];
- }
- }
+ LOCK(&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvol_up_time[i]) {
+ if (!time) {
+ time = conf->subvol_up_time[i];
+ child = conf->subvolumes[i];
+ } else if (time > conf->subvol_up_time[i]) {
+ time = conf->subvol_up_time[i];
+ child = conf->subvolumes[i];
}
+ }
}
- UNLOCK (&conf->subvolume_lock);
+ }
+ UNLOCK(&conf->subvolume_lock);
out:
- return child;
+ return child;
}
xlator_t *
-dht_last_up_subvol (xlator_t *this)
+dht_last_up_subvol(xlator_t *this)
{
- dht_conf_t *conf = NULL;
- xlator_t *child = NULL;
- int i = 0;
+ dht_conf_t *conf = NULL;
+ xlator_t *child = NULL;
+ int i = 0;
- conf = this->private;
- if (!conf)
- goto out;
+ conf = this->private;
+ if (!conf)
+ goto out;
- LOCK (&conf->subvolume_lock);
- {
- for (i = conf->subvolume_cnt-1; i >= 0; i--) {
- if (conf->subvolume_status[i]) {
- child = conf->subvolumes[i];
- break;
- }
- }
+ LOCK(&conf->subvolume_lock);
+ {
+ for (i = conf->subvolume_cnt - 1; i >= 0; i--) {
+ if (conf->subvolume_status[i]) {
+ child = conf->subvolumes[i];
+ break;
+ }
}
- UNLOCK (&conf->subvolume_lock);
+ }
+ UNLOCK(&conf->subvolume_lock);
out:
- return child;
+ return child;
}
xlator_t *
-dht_subvol_get_hashed (xlator_t *this, loc_t *loc)
+dht_subvol_get_hashed(xlator_t *this, loc_t *loc)
{
- dht_layout_t *layout = NULL;
- xlator_t *subvol = NULL;
- dht_conf_t *conf = NULL;
- dht_methods_t *methods = NULL;
+ dht_layout_t *layout = NULL;
+ xlator_t *subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
- conf = this->private;
- GF_VALIDATE_OR_GOTO (this->name, conf, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
- methods = conf->methods;
- GF_VALIDATE_OR_GOTO (this->name, conf->methods, out);
+ methods = &(conf->methods);
- if (__is_root_gfid (loc->gfid)) {
- subvol = dht_first_up_subvol (this);
- goto out;
- }
+ if (__is_root_gfid(loc->gfid)) {
+ subvol = dht_first_up_subvol(this);
+ goto out;
+ }
- GF_VALIDATE_OR_GOTO (this->name, loc->parent, out);
- GF_VALIDATE_OR_GOTO (this->name, loc->name, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->parent, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->name, out);
- layout = dht_layout_get (this, loc->parent);
+ layout = dht_layout_get(this, loc->parent);
- if (!layout) {
- gf_msg_debug (this->name, 0,
- "Missing layout. path=%s, parent gfid =%s",
- loc->path, uuid_utoa (loc->parent->gfid));
- goto out;
- }
+ if (!layout) {
+ gf_msg_debug(this->name, 0, "Missing layout. path=%s, parent gfid =%s",
+ loc->path, uuid_utoa(loc->parent->gfid));
+ goto out;
+ }
- subvol = methods->layout_search (this, layout, loc->name);
+ subvol = methods->layout_search(this, layout, loc->name);
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "No hashed subvolume for path=%s",
- loc->path);
- goto out;
- }
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "No hashed subvolume for path=%s",
+ loc->path);
+ goto out;
+ }
out:
- if (layout) {
- dht_layout_unref (this, layout);
- }
+ if (layout) {
+ dht_layout_unref(this, layout);
+ }
- return subvol;
+ return subvol;
}
-
xlator_t *
-dht_subvol_get_cached (xlator_t *this, inode_t *inode)
+dht_subvol_get_cached(xlator_t *this, inode_t *inode)
{
- dht_layout_t *layout = NULL;
- xlator_t *subvol = NULL;
+ dht_layout_t *layout = NULL;
+ xlator_t *subvol = NULL;
- GF_VALIDATE_OR_GOTO (this->name, this, out);
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- layout = dht_layout_get (this, inode);
+ layout = dht_layout_get(this, inode);
- if (!layout) {
- goto out;
- }
+ if (!layout) {
+ goto out;
+ }
- subvol = layout->list[0].xlator;
+ subvol = layout->list[0].xlator;
out:
- if (layout) {
- dht_layout_unref (this, layout);
- }
+ if (layout) {
+ dht_layout_unref(this, layout);
+ }
- return subvol;
+ return subvol;
}
-
xlator_t *
-dht_subvol_next (xlator_t *this, xlator_t *prev)
+dht_subvol_next(xlator_t *this, xlator_t *prev)
{
- dht_conf_t *conf = NULL;
- int i = 0;
- xlator_t *next = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *next = NULL;
- conf = this->private;
- if (!conf)
- goto out;
+ conf = this->private;
+ if (!conf)
+ goto out;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == prev) {
- if ((i + 1) < conf->subvolume_cnt)
- next = conf->subvolumes[i + 1];
- break;
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev) {
+ if ((i + 1) < conf->subvolume_cnt)
+ next = conf->subvolumes[i + 1];
+ break;
}
+ }
out:
- return next;
+ return next;
}
/* This func wraps around, if prev is actually the last subvol.
*/
xlator_t *
-dht_subvol_next_available (xlator_t *this, xlator_t *prev)
+dht_subvol_next_available(xlator_t *this, xlator_t *prev)
{
- dht_conf_t *conf = NULL;
- int i = 0;
- xlator_t *next = NULL;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == prev) {
- /* if prev is last in conf->subvolumes, then wrap
- * around.
- */
- if ((i + 1) < conf->subvolume_cnt) {
- next = conf->subvolumes[i + 1];
- } else {
- next = conf->subvolumes[0];
- }
- break;
- }
- }
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *next = NULL;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev) {
+ /* if prev is last in conf->subvolumes, then wrap
+ * around.
+ */
+ if ((i + 1) < conf->subvolume_cnt) {
+ next = conf->subvolumes[i + 1];
+ } else {
+ next = conf->subvolumes[0];
+ }
+ break;
+ }
+ }
out:
- return next;
+ return next;
}
int
-dht_subvol_cnt (xlator_t *this, xlator_t *subvol)
+dht_subvol_cnt(xlator_t *this, xlator_t *subvol)
{
- int i = 0;
- int ret = -1;
- dht_conf_t *conf = NULL;
+ int i = 0;
+ int ret = -1;
+ dht_conf_t *conf = NULL;
- conf = this->private;
- if (!conf)
- goto out;
+ conf = this->private;
+ if (!conf)
+ goto out;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- ret = i;
- break;
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ ret = i;
+ break;
}
+ }
out:
- return ret;
+ return ret;
}
+#define set_if_greater(a, b) \
+ do { \
+ if ((a) < (b)) \
+ (a) = (b); \
+ } while (0)
-#define set_if_greater(a, b) do { \
- if ((a) < (b)) \
- (a) = (b); \
- } while (0)
+#define set_if_greater_time(a, an, b, bn) \
+ do { \
+ if (((a) < (b)) || (((a) == (b)) && ((an) < (bn)))) { \
+ (a) = (b); \
+ (an) = (bn); \
+ } \
+ } while (0)
+int
+dht_iatt_merge(xlator_t *this, struct iatt *to, struct iatt *from)
+{
+ if (!from || !to)
+ return 0;
-#define set_if_greater_time(a, an, b, bn) do { \
- if (((a) < (b)) || (((a) == (b)) && ((an) < (bn)))){ \
- (a) = (b); \
- (an) = (bn); \
- } \
- } while (0) \
-
+ to->ia_dev = from->ia_dev;
+
+ gf_uuid_copy(to->ia_gfid, from->ia_gfid);
+
+ to->ia_ino = from->ia_ino;
+ to->ia_prot = from->ia_prot;
+ to->ia_type = from->ia_type;
+ to->ia_nlink = from->ia_nlink;
+ to->ia_rdev = from->ia_rdev;
+ to->ia_size += from->ia_size;
+ to->ia_blksize = from->ia_blksize;
+ to->ia_blocks += from->ia_blocks;
+
+ if (IA_ISDIR(from->ia_type)) {
+ to->ia_blocks = DHT_DIR_STAT_BLOCKS;
+ to->ia_size = DHT_DIR_STAT_SIZE;
+ }
+ set_if_greater(to->ia_uid, from->ia_uid);
+ set_if_greater(to->ia_gid, from->ia_gid);
+
+ set_if_greater_time(to->ia_atime, to->ia_atime_nsec, from->ia_atime,
+ from->ia_atime_nsec);
+ set_if_greater_time(to->ia_mtime, to->ia_mtime_nsec, from->ia_mtime,
+ from->ia_mtime_nsec);
+ set_if_greater_time(to->ia_ctime, to->ia_ctime_nsec, from->ia_ctime,
+ from->ia_ctime_nsec);
+
+ return 0;
+}
int
-dht_iatt_merge (xlator_t *this, struct iatt *to,
- struct iatt *from, xlator_t *subvol)
+dht_build_child_loc(xlator_t *this, loc_t *child, loc_t *parent, char *name)
{
- if (!from || !to)
- return 0;
+ if (!child) {
+ goto err;
+ }
- to->ia_dev = from->ia_dev;
+ if (strcmp(parent->path, "/") == 0)
+ gf_asprintf((char **)&child->path, "/%s", name);
+ else
+ gf_asprintf((char **)&child->path, "%s/%s", parent->path, name);
- gf_uuid_copy (to->ia_gfid, from->ia_gfid);
+ if (!child->path) {
+ goto err;
+ }
- to->ia_ino = from->ia_ino;
- to->ia_prot = from->ia_prot;
- to->ia_type = from->ia_type;
- to->ia_nlink = from->ia_nlink;
- to->ia_rdev = from->ia_rdev;
- to->ia_size += from->ia_size;
- to->ia_blksize = from->ia_blksize;
- to->ia_blocks += from->ia_blocks;
+ child->name = strrchr(child->path, '/');
+ if (child->name)
+ child->name++;
- set_if_greater (to->ia_uid, from->ia_uid);
- set_if_greater (to->ia_gid, from->ia_gid);
+ child->parent = inode_ref(parent->inode);
+ child->inode = inode_new(parent->inode->table);
- set_if_greater_time(to->ia_atime, to->ia_atime_nsec,
- from->ia_atime, from->ia_atime_nsec);
- set_if_greater_time (to->ia_mtime, to->ia_mtime_nsec,
- from->ia_mtime, from->ia_mtime_nsec);
- set_if_greater_time (to->ia_ctime, to->ia_ctime_nsec,
- from->ia_ctime, from->ia_ctime_nsec);
+ if (!child->inode) {
+ goto err;
+ }
- return 0;
+ return 0;
+err:
+ if (child) {
+ loc_wipe(child);
+ }
+ return -1;
}
int
-dht_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
+dht_init_local_subvolumes(xlator_t *this, dht_conf_t *conf)
{
- if (!child) {
- goto err;
- }
+ xlator_list_t *subvols = NULL;
+ int cnt = 0;
- if (strcmp (parent->path, "/") == 0)
- gf_asprintf ((char **)&child->path, "/%s", name);
- else
- gf_asprintf ((char **)&child->path, "%s/%s", parent->path, name);
-
- if (!child->path) {
- goto err;
- }
+ if (!conf)
+ return -1;
- child->name = strrchr (child->path, '/');
- if (child->name)
- child->name++;
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ cnt++;
- child->parent = inode_ref (parent->inode);
- child->inode = inode_new (parent->inode->table);
+ conf->local_subvols = GF_CALLOC(cnt, sizeof(xlator_t *),
+ gf_dht_mt_xlator_t);
- if (!child->inode) {
- goto err;
- }
+ /* FIX FIX : do this dynamically*/
+ conf->local_nodeuuids = GF_CALLOC(cnt, sizeof(subvol_nodeuuids_info_t),
+ gf_dht_nodeuuids_t);
- return 0;
-err:
- loc_wipe (child);
+ if (!conf->local_subvols || !conf->local_nodeuuids) {
return -1;
-}
+ }
+ conf->local_subvols_cnt = 0;
+ return 0;
+}
int
-dht_init_subvolumes (xlator_t *this, dht_conf_t *conf)
+dht_init_subvolumes(xlator_t *this, dht_conf_t *conf)
{
- xlator_list_t *subvols = NULL;
- int cnt = 0;
+ xlator_list_t *subvols = NULL;
+ int cnt = 0;
- if (!conf)
- return -1;
+ if (!conf)
+ return -1;
- for (subvols = this->children; subvols; subvols = subvols->next)
- cnt++;
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ cnt++;
- conf->subvolumes = GF_CALLOC (cnt, sizeof (xlator_t *),
- gf_dht_mt_xlator_t);
- if (!conf->subvolumes) {
- return -1;
- }
- conf->subvolume_cnt = cnt;
+ conf->subvolumes = GF_CALLOC(cnt, sizeof(xlator_t *), gf_dht_mt_xlator_t);
+ if (!conf->subvolumes) {
+ return -1;
+ }
+ conf->subvolume_cnt = cnt;
+ /* Doesn't make sense to do any dht layer tasks
+ if the subvol count is 1. Set it as pass_through */
+ if (cnt == 1)
+ this->pass_through = _gf_true;
- dht_set_subvol_range(this);
+ conf->local_subvols_cnt = 0;
- cnt = 0;
- for (subvols = this->children; subvols; subvols = subvols->next)
- conf->subvolumes[cnt++] = subvols->xlator;
+ dht_set_subvol_range(this);
- conf->subvolume_status = GF_CALLOC (cnt, sizeof (char),
- gf_dht_mt_char);
- if (!conf->subvolume_status) {
- return -1;
- }
+ cnt = 0;
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ conf->subvolumes[cnt++] = subvols->xlator;
- conf->last_event = GF_CALLOC (cnt, sizeof (int),
- gf_dht_mt_char);
- if (!conf->last_event) {
- return -1;
- }
-
- conf->subvol_up_time = GF_CALLOC (cnt, sizeof (time_t),
- gf_dht_mt_subvol_time);
- if (!conf->subvol_up_time) {
- return -1;
- }
+ conf->subvolume_status = GF_CALLOC(cnt, sizeof(char), gf_dht_mt_char);
+ if (!conf->subvolume_status) {
+ return -1;
+ }
- conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
- gf_dht_mt_dht_du_t);
- if (!conf->du_stats) {
- return -1;
- }
+ conf->last_event = GF_CALLOC(cnt, sizeof(int), gf_dht_mt_char);
+ if (!conf->last_event) {
+ return -1;
+ }
- conf->decommissioned_bricks = GF_CALLOC (cnt, sizeof (xlator_t *),
- gf_dht_mt_xlator_t);
- if (!conf->decommissioned_bricks) {
- return -1;
- }
+ conf->subvol_up_time = GF_CALLOC(cnt, sizeof(time_t),
+ gf_dht_mt_subvol_time);
+ if (!conf->subvol_up_time) {
+ return -1;
+ }
- return 0;
-}
+ conf->du_stats = GF_CALLOC(conf->subvolume_cnt, sizeof(dht_du_t),
+ gf_dht_mt_dht_du_t);
+ if (!conf->du_stats) {
+ return -1;
+ }
+ conf->decommissioned_bricks = GF_CALLOC(cnt, sizeof(xlator_t *),
+ gf_dht_mt_xlator_t);
+ if (!conf->decommissioned_bricks) {
+ return -1;
+ }
+ return 0;
+}
+/*
+ op_ret values :
+ 0 : Success.
+ -1 : Failure.
+ 1 : File is being migrated but not by this DHT layer.
+*/
static int
-dht_migration_complete_check_done (int op_ret, call_frame_t *frame, void *data)
+dht_migration_complete_check_done(int op_ret, call_frame_t *frame, void *data)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
- local = frame->local;
+ local = frame->local;
- local->rebalance.target_op_fn (THIS, frame, op_ret);
+ if (op_ret != 0)
+ goto out;
- return 0;
-}
+ if (local->cached_subvol == NULL) {
+ local->op_errno = EINVAL;
+ goto out;
+ }
+ subvol = local->cached_subvol;
-int
-dht_migration_complete_check_task (void *data)
-{
- int ret = -1;
- xlator_t *src_node = NULL;
- xlator_t *dst_node = NULL;
- dht_local_t *local = NULL;
- dict_t *dict = NULL;
- dht_layout_t *layout = NULL;
- struct iatt stbuf = {0,};
- xlator_t *this = NULL;
- call_frame_t *frame = NULL;
- loc_t tmp_loc = {0,};
- char *path = NULL;
- dht_conf_t *conf = NULL;
- inode_t *inode = NULL;
- fd_t *iter_fd = NULL;
- uint64_t tmp_subvol = 0;
- int open_failed = 0;
-
- this = THIS;
- frame = data;
- local = frame->local;
- conf = this->private;
-
- src_node = local->cached_subvol;
-
- if (!local->loc.inode && !local->fd) {
- local->op_errno = EINVAL;
- goto out;
- }
-
- inode = (!local->fd) ? local->loc.inode : local->fd->inode;
-
- /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr
- * as root:root. If a fd is already open, access check wont be done*/
+out:
+ local->rebalance.target_op_fn(THIS, subvol, frame, op_ret);
- if (!local->loc.inode) {
- ret = syncop_fgetxattr (src_node, local->fd, &dict,
- conf->link_xattr_name, NULL, NULL);
- } else {
- SYNCTASK_SETID (0, 0);
- ret = syncop_getxattr (src_node, &local->loc, &dict,
- conf->link_xattr_name, NULL, NULL);
- SYNCTASK_SETID (frame->root->uid, frame->root->gid);
- }
+ return 0;
+}
- /*
- * temporary check related to tier promoting/demoting the file;
- * the lower level DHT detects the migration (due to sticky
- * bits) when it is the responsibility of the tier translator
- * to complete the rebalance transaction. It will be corrected
- * when rebalance and tier migration are fixed to work together.
+int
+dht_migration_complete_check_task(void *data)
+{
+ int ret = -1;
+ xlator_t *src_node = NULL;
+ xlator_t *dst_node = NULL, *linkto_target = NULL;
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ loc_t tmp_loc = {
+ 0,
+ };
+ char *path = NULL;
+ dht_conf_t *conf = NULL;
+ inode_t *inode = NULL;
+ fd_t *iter_fd = NULL;
+ fd_t *tmp = NULL;
+ uint64_t tmp_miginfo = 0;
+ dht_migrate_info_t *miginfo = NULL;
+ gf_boolean_t skip_open = _gf_false;
+ int open_failed = 0;
+
+ this = THIS;
+ frame = data;
+ local = frame->local;
+ conf = this->private;
+
+ src_node = local->cached_subvol;
+
+ if (!local->loc.inode && !local->fd) {
+ local->op_errno = EINVAL;
+ goto out;
+ }
+
+ inode = (!local->fd) ? local->loc.inode : local->fd->inode;
+
+ /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr
+ * as root:root. If a fd is already open, access check won't be done*/
+
+ if (!local->loc.inode) {
+ ret = syncop_fgetxattr(src_node, local->fd, &dict,
+ conf->link_xattr_name, NULL, NULL);
+ } else {
+ SYNCTASK_SETID(0, 0);
+ ret = syncop_getxattr(src_node, &local->loc, &dict,
+ conf->link_xattr_name, NULL, NULL);
+ SYNCTASK_SETID(frame->root->uid, frame->root->gid);
+ }
+
+ /*
+ * Each DHT xlator layer has its own name for the linkto xattr.
+ * If the file mode bits indicate the the file is being migrated but
+ * this layer's linkto xattr is not set, it means that another
+ * DHT layer is migrating the file. In this case, return 1 so
+ * the mode bits can be passed on to the higher layer for appropriate
+ * action.
+ */
+ if (-ret == ENODATA) {
+ /* This DHT translator is not migrating this file */
+
+ ret = inode_ctx_reset1(inode, this, &tmp_miginfo);
+ if (tmp_miginfo) {
+ /* This can be a problem if the file was
+ * migrated by two different layers. Raise
+ * a warning here.
+ */
+ gf_smsg(
+ this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), NULL);
+
+ miginfo = (void *)(uintptr_t)tmp_miginfo;
+ GF_REF_PUT(miginfo);
+ }
+ ret = 1;
+ goto out;
+ }
+
+ if (!ret)
+ linkto_target = dht_linkfile_subvol(this, NULL, NULL, dict);
+
+ if (local->loc.inode) {
+ loc_copy(&tmp_loc, &local->loc);
+ } else {
+ tmp_loc.inode = inode_ref(inode);
+ gf_uuid_copy(tmp_loc.gfid, inode->gfid);
+ }
+
+ ret = syncop_lookup(this, &tmp_loc, &stbuf, 0, 0, 0);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED,
+ "tmp=%s", tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "name=%s", this->name, NULL);
+ local->op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ dst_node = dht_subvol_get_cached(this, tmp_loc.inode);
+ if (linkto_target && dst_node != linkto_target) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_LINKFILE,
+ "linkto_target_name=%s", linkto_target->name, "dst_name=%s",
+ dst_node->name, NULL);
+ }
+
+ if (gf_uuid_compare(stbuf.ia_gfid, tmp_loc.inode->gfid)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "dst_name=%s", dst_node->name, NULL);
+ ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+
+ /* update local. A layout is set in inode-ctx in lookup already */
+
+ dht_layout_unref(this, local->layout);
+
+ local->layout = dht_layout_get(frame->this, inode);
+ local->cached_subvol = dst_node;
+
+ ret = 0;
+
+ /* once we detect the migration complete, the inode-ctx2 is no more
+ required.. delete the ctx and also, it means, open() already
+ done on all the fd of inode */
+ ret = inode_ctx_reset1(inode, this, &tmp_miginfo);
+ if (tmp_miginfo) {
+ miginfo = (void *)(uintptr_t)tmp_miginfo;
+ GF_REF_PUT(miginfo);
+ goto out;
+ }
+
+ /* perform 'open()' on all the fd's present on the inode */
+ if (tmp_loc.path == NULL) {
+ inode_path(inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+ }
+
+ LOCK(&inode->lock);
+
+ if (list_empty(&inode->fd_list))
+ goto unlock;
+
+ /* perform open as root:root. There is window between linkfile
+ * creation(root:root) and setattr with the correct uid/gid
+ */
+ SYNCTASK_SETID(0, 0);
+
+ /* It's possible that we are the last user of iter_fd after each
+ * iteration. In this case the fd_unref() of iter_fd at the end of
+ * the loop will cause the destruction of the fd. So we need to
+ * iterate the list safely because iter_fd cannot be trusted.
+ */
+ iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list);
+ while (&iter_fd->inode_list != (&inode->fd_list)) {
+ if (fd_is_anonymous(iter_fd) ||
+ (dht_fd_open_on_dst(this, iter_fd, dst_node))) {
+ if (!tmp) {
+ iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd),
+ inode_list);
+ continue;
+ }
+ skip_open = _gf_true;
+ }
+ /* We need to release the inode->lock before calling
+ * syncop_open() to avoid possible deadlocks. However this
+ * can cause the iter_fd to be released by other threads.
+ * To avoid this, we take a reference before releasing the
+ * lock.
*/
- if (strcmp(this->parents->xlator->type, "cluster/tier") == 0) {
- ret = 0;
- goto out;
- }
-
- if (!ret)
- dst_node = dht_linkfile_subvol (this, NULL, NULL, dict);
-
- if (ret) {
- if (!dht_inode_missing(-ret) || (!local->loc.inode)) {
- local->op_errno = -ret;
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to get the 'linkto' xattr %s",
- local->loc.path, strerror (-ret));
- ret = -1;
- goto out;
- }
-
- /* Need to do lookup on hashed subvol, then get the file */
- ret = syncop_lookup (this, &local->loc, &stbuf, NULL,
- NULL, NULL);
- if (ret) {
- local->op_errno = -ret;
- ret = -1;
- goto out;
- }
-
- dst_node = dht_subvol_get_cached (this, local->loc.inode);
- }
-
- if (!dst_node) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to get the destination node",
- local->loc.path);
- ret = -1;
- local->op_errno = EINVAL;
- goto out;
- }
-
- /* lookup on dst */
- if (local->loc.inode) {
- ret = syncop_lookup (dst_node, &local->loc, &stbuf, NULL,
- NULL, NULL);
-
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to lookup the file on %s",
- local->loc.path, dst_node->name);
- local->op_errno = -ret;
- ret = -1;
- goto out;
- }
+ fd_ref(iter_fd);
- if (gf_uuid_compare (stbuf.ia_gfid, local->loc.inode->gfid)) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_GFID_MISMATCH,
- "%s: gfid different on the target file on %s",
- local->loc.path, dst_node->name);
- ret = -1;
- local->op_errno = EIO;
- goto out;
- }
- }
-
- /* update inode ctx (the layout) */
- dht_layout_unref (this, local->layout);
-
- ret = dht_layout_preset (this, dst_node, inode);
- if (ret != 0) {
- gf_msg_debug (this->name, 0,
- "%s: could not set preset layout "
- "for subvol %s", local->loc.path,
- dst_node->name);
- ret = -1;
- local->op_errno = EINVAL;
- goto out;
- }
+ UNLOCK(&inode->lock);
- layout = dht_layout_for_subvol (this, dst_node);
- if (!layout) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: no pre-set layout for subvolume %s",
- local->loc.path, dst_node ? dst_node->name : "<nil>");
- ret = -1;
- local->op_errno = EINVAL;
- goto out;
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
}
+ if (skip_open)
+ goto next;
- ret = dht_layout_set (this, inode, layout);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set the new layout",
- local->loc.path);
- local->op_errno = EINVAL;
- goto out;
+ /* flags for open are stripped down to allow following the
+ * new location of the file, otherwise we can get EEXIST or
+ * truncate the file again as rebalance is moving the data */
+ ret = syncop_open(dst_node, &tmp_loc,
+ (iter_fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)),
+ iter_fd, NULL, NULL);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_OPEN_FD_ON_DST_FAILED, "id=%p", iter_fd,
+ "flags=0%o", iter_fd->flags, "path=%s", path, "name=%s",
+ dst_node->name, NULL);
+
+ open_failed = 1;
+ local->op_errno = -ret;
+ ret = -1;
+ } else {
+ dht_fd_ctx_set(this, iter_fd, dst_node);
}
- local->cached_subvol = dst_node;
- ret = 0;
+ next:
+ LOCK(&inode->lock);
+ skip_open = _gf_false;
+ tmp = iter_fd;
+ iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list);
+ }
- /* once we detect the migration complete, the inode-ctx2 is no more
- required.. delete the ctx and also, it means, open() already
- done on all the fd of inode */
- ret = inode_ctx_reset1 (inode, this, &tmp_subvol);
- if (tmp_subvol)
- goto out;
+ SYNCTASK_SETID(frame->root->uid, frame->root->gid);
- if (list_empty (&inode->fd_list))
- goto out;
+ if (open_failed) {
+ ret = -1;
+ goto unlock;
+ }
+ ret = 0;
- /* perform open as root:root. There is window between linkfile
- * creation(root:root) and setattr with the correct uid/gid
- */
- SYNCTASK_SETID(0, 0);
+unlock:
+ UNLOCK(&inode->lock);
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
+ }
- /* perform 'open()' on all the fd's present on the inode */
- tmp_loc.inode = inode;
- inode_path (inode, NULL, &path);
- if (path)
- tmp_loc.path = path;
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- if (fd_is_anonymous (iter_fd))
- continue;
-
- /* flags for open are stripped down to allow following the
- * new location of the file, otherwise we can get EEXIST or
- * truncate the file again as rebalance is moving the data */
- ret = syncop_open (dst_node, &tmp_loc,
- (iter_fd->flags &
- ~(O_CREAT | O_EXCL | O_TRUNC)), iter_fd,
- NULL, NULL);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "failed to open "
- "the fd (%p, flags=0%o) on file %s @ %s",
- iter_fd, iter_fd->flags, path, dst_node->name);
- open_failed = 1;
- local->op_errno = -ret;
- ret = -1;
- }
- }
- GF_FREE (path);
-
- SYNCTASK_SETID (frame->root->uid, frame->root->gid);
-
- if (open_failed) {
- ret = -1;
- goto out;
- }
- ret = 0;
out:
+ if (dict) {
+ dict_unref(dict);
+ }
- return ret;
+ loc_wipe(&tmp_loc);
+
+ return ret;
}
int
-dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame)
+dht_rebalance_complete_check(xlator_t *this, call_frame_t *frame)
{
- int ret = -1;
+ int ret = -1;
- ret = synctask_new (this->ctx->env, dht_migration_complete_check_task,
- dht_migration_complete_check_done,
- frame, frame);
- return ret;
+ ret = synctask_new(this->ctx->env, dht_migration_complete_check_task,
+ dht_migration_complete_check_done, frame, frame);
+ return ret;
}
/* During 'in-progress' state, both nodes should have the file */
+/*
+ op_ret values :
+ 0 : Success
+ -1 : Failure.
+ 1 : File is being migrated but not by this DHT layer.
+*/
static int
-dht_inprogress_check_done (int op_ret, call_frame_t *sync_frame, void *data)
-{
- dht_local_t *local = NULL;
-
- local = sync_frame->local;
-
- local->rebalance.target_op_fn (THIS, sync_frame, op_ret);
-
- return 0;
-}
-
-static int
-dht_rebalance_inprogress_task (void *data)
+dht_inprogress_check_done(int op_ret, call_frame_t *frame, void *data)
{
- int ret = -1;
- xlator_t *src_node = NULL;
- xlator_t *dst_node = NULL;
- dht_local_t *local = NULL;
- dict_t *dict = NULL;
- call_frame_t *frame = NULL;
- xlator_t *this = NULL;
- char *path = NULL;
- struct iatt stbuf = {0,};
- loc_t tmp_loc = {0,};
- dht_conf_t *conf = NULL;
- inode_t *inode = NULL;
- fd_t *iter_fd = NULL;
- int open_failed = 0;
-
- this = THIS;
- frame = data;
- local = frame->local;
- conf = this->private;
-
- src_node = local->cached_subvol;
-
- if (!local->loc.inode && !local->fd)
- goto out;
+ dht_local_t *local = NULL;
+ xlator_t *dst_subvol = NULL, *src_subvol = NULL;
+ inode_t *inode = NULL;
- inode = (!local->fd) ? local->loc.inode : local->fd->inode;
+ local = frame->local;
- /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr
- * as root:root. If a fd is already open, access check wont be done*/
- if (local->loc.inode) {
- SYNCTASK_SETID (0, 0);
- ret = syncop_getxattr (src_node, &local->loc, &dict,
- conf->link_xattr_name, NULL, NULL);
- SYNCTASK_SETID (frame->root->uid, frame->root->gid);
- } else {
- ret = syncop_fgetxattr (src_node, local->fd, &dict,
- conf->link_xattr_name, NULL, NULL);
- }
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to get the 'linkto' xattr %s",
- local->loc.path, strerror (-ret));
- ret = -1;
- goto out;
- }
-
- dst_node = dht_linkfile_subvol (this, NULL, NULL, dict);
- if (!dst_node) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to get the 'linkto' xattr from dict",
- local->loc.path);
- ret = -1;
- goto out;
- }
+ if (op_ret != 0)
+ goto out;
- local->rebalance.target_node = dst_node;
-
- if (local->loc.inode) {
- /* lookup on dst */
- ret = syncop_lookup (dst_node, &local->loc, &stbuf, NULL,
- NULL, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to lookup the file on %s",
- local->loc.path, dst_node->name);
- ret = -1;
- goto out;
- }
+ inode = local->loc.inode ? local->loc.inode : local->fd->inode;
- if (gf_uuid_compare (stbuf.ia_gfid, local->loc.inode->gfid)) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_GFID_MISMATCH,
- "%s: gfid different on the target file on %s",
- local->loc.path, dst_node->name);
- ret = -1;
- goto out;
- }
+ dht_inode_ctx_get_mig_info(THIS, inode, &src_subvol, &dst_subvol);
+ if (dht_mig_info_is_invalid(local->cached_subvol, src_subvol, dst_subvol)) {
+ dst_subvol = dht_subvol_get_cached(THIS, inode);
+ if (!dst_subvol) {
+ local->op_errno = EINVAL;
+ goto out;
}
+ }
- ret = 0;
+out:
+ local->rebalance.target_op_fn(THIS, dst_subvol, frame, op_ret);
- if (list_empty (&inode->fd_list))
- goto done;
+ return 0;
+}
- /* perform open as root:root. There is window between linkfile
- * creation(root:root) and setattr with the correct uid/gid
+static int
+dht_rebalance_inprogress_task(void *data)
+{
+ int ret = -1;
+ xlator_t *src_node = NULL;
+ xlator_t *dst_node = NULL;
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ char *path = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ loc_t tmp_loc = {
+ 0,
+ };
+ dht_conf_t *conf = NULL;
+ inode_t *inode = NULL;
+ fd_t *iter_fd = NULL;
+ fd_t *tmp = NULL;
+ int open_failed = 0;
+ uint64_t tmp_miginfo = 0;
+ dht_migrate_info_t *miginfo = NULL;
+ gf_boolean_t skip_open = _gf_false;
+
+ this = THIS;
+ frame = data;
+ local = frame->local;
+ conf = this->private;
+
+ src_node = local->cached_subvol;
+
+ if (!local->loc.inode && !local->fd)
+ goto out;
+
+ inode = (!local->fd) ? local->loc.inode : local->fd->inode;
+
+ /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr
+ * as root:root. If a fd is already open, access check won't be done*/
+ if (local->loc.inode) {
+ SYNCTASK_SETID(0, 0);
+ ret = syncop_getxattr(src_node, &local->loc, &dict,
+ conf->link_xattr_name, NULL, NULL);
+ SYNCTASK_SETID(frame->root->uid, frame->root->gid);
+ } else {
+ ret = syncop_fgetxattr(src_node, local->fd, &dict,
+ conf->link_xattr_name, NULL, NULL);
+ }
+
+ /*
+ * Each DHT xlator layer has its own name for the linkto xattr.
+ * If the file mode bits indicate the the file is being migrated but
+ * this layer's linkto xattr is not present, it means that another
+ * DHT layer is migrating the file. In this case, return 1 so
+ * the mode bits can be passed on to the higher layer for appropriate
+ * action.
+ */
+
+ if (-ret == ENODATA) {
+ /* This DHT layer is not migrating this file */
+ ret = inode_ctx_reset1(inode, this, &tmp_miginfo);
+ if (tmp_miginfo) {
+ /* This can be a problem if the file was
+ * migrated by two different layers. Raise
+ * a warning here.
+ */
+ gf_smsg(
+ this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), NULL);
+ miginfo = (void *)(uintptr_t)tmp_miginfo;
+ GF_REF_PUT(miginfo);
+ }
+ ret = 1;
+ goto out;
+ }
+
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_GET_XATTR_FAILED,
+ "path=%s", local->loc.path, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ dst_node = dht_linkfile_subvol(this, NULL, NULL, dict);
+ if (!dst_node) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GET_XATTR_FAILED,
+ "path=%s", local->loc.path, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ local->rebalance.target_node = dst_node;
+
+ if (local->loc.inode) {
+ loc_copy(&tmp_loc, &local->loc);
+ } else {
+ tmp_loc.inode = inode_ref(inode);
+ gf_uuid_copy(tmp_loc.gfid, inode->gfid);
+ }
+
+ /* lookup on dst */
+ ret = syncop_lookup(dst_node, &tmp_loc, &stbuf, NULL, NULL, NULL);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED,
+ "tmp=%s", tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "name=%s", dst_node->name, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ if (gf_uuid_compare(stbuf.ia_gfid, tmp_loc.inode->gfid)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "name=%s", dst_node->name, NULL);
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+
+ if (tmp_loc.path == NULL) {
+ inode_path(inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+ }
+
+ LOCK(&inode->lock);
+
+ if (list_empty(&inode->fd_list))
+ goto unlock;
+
+ /* perform open as root:root. There is window between linkfile
+ * creation(root:root) and setattr with the correct uid/gid
+ */
+ SYNCTASK_SETID(0, 0);
+
+ /* It's possible that we are the last user of iter_fd after each
+ * iteration. In this case the fd_unref() of iter_fd at the end of
+ * the loop will cause the destruction of the fd. So we need to
+ * iterate the list safely because iter_fd cannot be trusted.
+ */
+ iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list);
+ while (&iter_fd->inode_list != (&inode->fd_list)) {
+ /* We need to release the inode->lock before calling
+ * syncop_open() to avoid possible deadlocks. However this
+ * can cause the iter_fd to be released by other threads.
+ * To avoid this, we take a reference before releasing the
+ * lock.
*/
- SYNCTASK_SETID (0, 0);
- tmp_loc.inode = inode;
- inode_path (inode, NULL, &path);
- if (path)
- tmp_loc.path = path;
-
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- if (fd_is_anonymous (iter_fd))
- continue;
-
- /* flags for open are stripped down to allow following the
- * new location of the file, otherwise we can get EEXIST or
- * truncate the file again as rebalance is moving the data */
- ret = syncop_open (dst_node, &tmp_loc,
- (iter_fd->flags &
- ~(O_CREAT | O_EXCL | O_TRUNC)), iter_fd,
- NULL, NULL);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "failed to send open "
- "the fd (%p, flags=0%o) on file %s @ %s",
- iter_fd, iter_fd->flags, path, dst_node->name);
- ret = -1;
- open_failed = 1;
- }
+ if (fd_is_anonymous(iter_fd) ||
+ (dht_fd_open_on_dst(this, iter_fd, dst_node))) {
+ if (!tmp) {
+ iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd),
+ inode_list);
+ continue;
+ }
+ skip_open = _gf_true;
}
- GF_FREE (path);
- SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+ /* Yes, this is ugly but there isn't a cleaner way to do this
+ * the fd_ref is an atomic increment so not too bad. We want to
+ * reduce the number of inode locks and unlocks.
+ */
- if (open_failed) {
- ret = -1;
- goto out;
- }
+ fd_ref(iter_fd);
+ UNLOCK(&inode->lock);
-done:
- ret = dht_inode_ctx_set1 (this, inode, dst_node);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set inode-ctx target file at %s",
- local->loc.path, dst_node->name);
- goto out;
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
}
+ if (skip_open)
+ goto next;
- ret = 0;
+ /* flags for open are stripped down to allow following the
+ * new location of the file, otherwise we can get EEXIST or
+ * truncate the file again as rebalance is moving the data */
+ ret = syncop_open(dst_node, &tmp_loc,
+ (iter_fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)),
+ iter_fd, NULL, NULL);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_OPEN_FD_ON_DST_FAILED, "fd=%p", iter_fd,
+ "flags=0%o", iter_fd->flags, "path=%s", path, "name=%s",
+ dst_node->name, NULL);
+ ret = -1;
+ open_failed = 1;
+ } else {
+ /* Potential fd leak if this fails here as it will be
+ reopened at the next Phase1/2 check */
+ dht_fd_ctx_set(this, iter_fd, dst_node);
+ }
+
+ next:
+ LOCK(&inode->lock);
+ skip_open = _gf_false;
+ tmp = iter_fd;
+ iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list);
+ }
+
+ SYNCTASK_SETID(frame->root->uid, frame->root->gid);
+
+unlock:
+ UNLOCK(&inode->lock);
+
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
+ }
+ if (open_failed) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dht_inode_ctx_set_mig_info(this, inode, src_node, dst_node);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "path=%s", local->loc.path, "name=%s", dst_node->name, NULL);
+ goto out;
+ }
+
+ ret = 0;
out:
- return ret;
+ if (dict) {
+ dict_unref(dict);
+ }
+
+ loc_wipe(&tmp_loc);
+ return ret;
}
int
-dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame)
+dht_rebalance_in_progress_check(xlator_t *this, call_frame_t *frame)
{
+ int ret = -1;
- int ret = -1;
-
- ret = synctask_new (this->ctx->env, dht_rebalance_inprogress_task,
- dht_inprogress_check_done,
- frame, frame);
- return ret;
+ ret = synctask_new(this->ctx->env, dht_rebalance_inprogress_task,
+ dht_inprogress_check_done, frame, frame);
+ return ret;
}
int
-dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this,
- dht_layout_t *layout_int)
+dht_inode_ctx_layout_set(inode_t *inode, xlator_t *this,
+ dht_layout_t *layout_int)
{
- dht_inode_ctx_t *ctx = NULL;
- int ret = -1;
-
- ret = dht_inode_ctx_get (inode, this, &ctx);
- if (!ret && ctx) {
- ctx->layout = layout_int;
- } else {
- ctx = GF_CALLOC (1, sizeof (*ctx), gf_dht_mt_inode_ctx_t);
- if (!ctx)
- return ret;
- ctx->layout = layout_int;
- }
-
- ret = dht_inode_ctx_set (inode, this, ctx);
-
- return ret;
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ ret = dht_inode_ctx_get(inode, this, &ctx);
+ if (!ret && ctx) {
+ ctx->layout = layout_int;
+ } else {
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ return ret;
+ ctx->layout = layout_int;
+ }
+
+ ret = dht_inode_ctx_set(inode, this, ctx);
+
+ return ret;
}
-
void
-dht_inode_ctx_time_set (inode_t *inode, xlator_t *this, struct iatt *stat)
+dht_inode_ctx_time_set(inode_t *inode, xlator_t *this, struct iatt *stat)
{
- dht_inode_ctx_t *ctx = NULL;
- dht_stat_time_t *time = 0;
- int ret = -1;
+ dht_inode_ctx_t *ctx = NULL;
+ dht_stat_time_t *time = 0;
+ int ret = -1;
- ret = dht_inode_ctx_get (inode, this, &ctx);
+ ret = dht_inode_ctx_get(inode, this, &ctx);
- if (ret)
- return;
+ if (ret)
+ return;
- time = &ctx->time;
+ time = &ctx->time;
- time->mtime = stat->ia_mtime;
- time->mtime_nsec = stat->ia_mtime_nsec;
+ time->mtime = stat->ia_mtime;
+ time->mtime_nsec = stat->ia_mtime_nsec;
- time->ctime = stat->ia_ctime;
- time->ctime_nsec = stat->ia_ctime_nsec;
+ time->ctime = stat->ia_ctime;
+ time->ctime_nsec = stat->ia_ctime_nsec;
- time->atime = stat->ia_atime;
- time->atime_nsec = stat->ia_atime_nsec;
+ time->atime = stat->ia_atime;
+ time->atime_nsec = stat->ia_atime_nsec;
- return;
+ return;
}
-
int
-dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat,
- int32_t post)
+dht_inode_ctx_time_update(inode_t *inode, xlator_t *this, struct iatt *stat,
+ int32_t post)
{
- dht_inode_ctx_t *ctx = NULL;
- dht_stat_time_t *time = 0;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO (this->name, stat, out);
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
-
- ret = dht_inode_ctx_get (inode, this, &ctx);
-
- if (ret) {
- ctx = GF_CALLOC (1, sizeof (*ctx), gf_dht_mt_inode_ctx_t);
- if (!ctx)
- return -1;
- }
-
- time = &ctx->time;
-
- DHT_UPDATE_TIME(time->mtime, time->mtime_nsec,
- stat->ia_mtime, stat->ia_mtime_nsec, inode, post);
- DHT_UPDATE_TIME(time->ctime, time->ctime_nsec,
- stat->ia_ctime, stat->ia_ctime_nsec, inode, post);
- DHT_UPDATE_TIME(time->atime, time->atime_nsec,
- stat->ia_atime, stat->ia_atime_nsec, inode, post);
-
- ret = dht_inode_ctx_set (inode, this, ctx);
+ dht_inode_ctx_t *ctx = NULL;
+ dht_stat_time_t *time = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO(this->name, stat, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ ret = dht_inode_ctx_get(inode, this, &ctx);
+
+ if (ret) {
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ return -1;
+ }
+
+ time = &ctx->time;
+
+ LOCK(&inode->lock);
+ {
+ DHT_UPDATE_TIME(time->mtime, time->mtime_nsec, stat->ia_mtime,
+ stat->ia_mtime_nsec, post);
+ DHT_UPDATE_TIME(time->ctime, time->ctime_nsec, stat->ia_ctime,
+ stat->ia_ctime_nsec, post);
+ DHT_UPDATE_TIME(time->atime, time->atime_nsec, stat->ia_atime,
+ stat->ia_atime_nsec, post);
+ }
+ UNLOCK(&inode->lock);
+
+ ret = dht_inode_ctx_set(inode, this, ctx);
out:
- return 0;
+ return 0;
}
int
-dht_inode_ctx_get (inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx)
+dht_inode_ctx_get(inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx)
{
- int ret = -1;
- uint64_t ctx_int = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ int ret = -1;
+ uint64_t ctx_int = 0;
- ret = inode_ctx_get (inode, this, &ctx_int);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- if (ret)
- return ret;
+ ret = inode_ctx_get(inode, this, &ctx_int);
- if (ctx)
- *ctx = (dht_inode_ctx_t *) ctx_int;
-out:
+ if (ret)
return ret;
-}
-int dht_inode_ctx_set (inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx)
-{
- int ret = -1;
- uint64_t ctx_int = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
- GF_VALIDATE_OR_GOTO (this->name, ctx, out);
-
- ctx_int = (long)ctx;
- ret = inode_ctx_set (inode, this, &ctx_int);
+ if (ctx)
+ *ctx = (dht_inode_ctx_t *)(uintptr_t)ctx_int;
out:
- return ret;
+ return ret;
}
-void
-dht_set_lkowner (dht_lock_t **lk_array, int count, gf_lkowner_t *lkowner)
+int
+dht_inode_ctx_set(inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx)
{
- int i = 0;
-
- if (!lk_array || !lkowner)
- goto out;
+ int ret = -1;
+ uint64_t ctx_int = 0;
- for (i = 0; i < count; i++) {
- lk_array[i]->lk_owner = *lkowner;
- }
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, ctx, out);
+ ctx_int = (long)ctx;
+ ret = inode_ctx_set(inode, this, &ctx_int);
out:
- return;
+ return ret;
}
int
-dht_subvol_status (dht_conf_t *conf, xlator_t *subvol)
+dht_subvol_status(dht_conf_t *conf, xlator_t *subvol)
{
- int i;
+ int i;
- for (i=0 ; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == subvol) {
- return conf->subvolume_status[i];
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == subvol) {
+ return conf->subvolume_status[i];
}
- return 0;
+ }
+ return 0;
}
-void
-dht_inodelk_done (call_frame_t *lock_frame)
+inode_t *
+dht_heal_path(xlator_t *this, char *path, inode_table_t *itable)
{
- fop_inodelk_cbk_t inodelk_cbk = NULL;
- call_frame_t *main_frame = NULL;
- dht_local_t *local = NULL;
-
- local = lock_frame->local;
- main_frame = local->main_frame;
-
- local->lock.locks = NULL;
- local->lock.lk_count = 0;
-
- inodelk_cbk = local->lock.inodelk_cbk;
- local->lock.inodelk_cbk = NULL;
-
- inodelk_cbk (main_frame, NULL, main_frame->this, local->lock.op_ret,
- local->lock.op_errno, NULL);
-
- dht_lock_stack_destroy (lock_frame);
- return;
-}
-
-int
-dht_inodelk_cleanup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- dht_inodelk_done (frame);
- return 0;
-}
-
-int32_t
-dht_lock_count (dht_lock_t **lk_array, int lk_count)
-{
- int i = 0, locked = 0;
-
- if ((lk_array == NULL) || (lk_count == 0))
+ int ret = -1;
+ struct iatt iatt = {
+ 0,
+ };
+ inode_t *linked_inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ char *bname = NULL;
+ char *save_ptr = NULL;
+ static uuid_t gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ char *tmp_path = NULL;
+
+ tmp_path = gf_strdup(path);
+ if (!tmp_path) {
+ goto out;
+ }
+
+ gf_uuid_copy(loc.pargfid, gfid);
+ loc.parent = inode_ref(itable->root);
+
+ bname = strtok_r(tmp_path, "/", &save_ptr);
+
+ /* sending a lookup on parent directory,
+ * Eg: if path is like /a/b/c/d/e/f/g/
+ * then we will send a lookup on a first and then b,c,d,etc
+ */
+
+ while (bname) {
+ linked_inode = NULL;
+ loc.inode = inode_grep(itable, loc.parent, bname);
+ if (loc.inode == NULL) {
+ loc.inode = inode_new(itable);
+ if (loc.inode == NULL) {
+ ret = -ENOMEM;
goto out;
-
- for (i = 0; i < lk_count; i++) {
- if (lk_array[i]->locked)
- locked++;
- }
-out:
- return locked;
-}
-
-void
-dht_inodelk_cleanup (call_frame_t *lock_frame)
-{
- dht_lock_t **lk_array = NULL;
- int lk_count = 0, lk_acquired = 0;
- dht_local_t *local = NULL;
-
- local = lock_frame->local;
-
- lk_array = local->lock.locks;
- lk_count = local->lock.lk_count;
-
- lk_acquired = dht_lock_count (lk_array, lk_count);
- if (lk_acquired != 0) {
- dht_unlock_inodelk (lock_frame, lk_array, lk_count,
- dht_inodelk_cleanup_cbk);
+ }
} else {
- dht_inodelk_done (lock_frame);
- }
-
- return;
-}
-
-int32_t
-dht_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- int lk_index = 0, call_cnt = 0;
- char gfid[GF_UUID_BUF_SIZE] = {0};
-
- lk_index = (long) cookie;
-
- local = frame->local;
- if (op_ret < 0) {
- uuid_utoa_r (local->lock.locks[lk_index]->loc.gfid,
- gfid);
-
- gf_log (this->name, GF_LOG_WARNING,
- "unlocking failed on %s:%s (%s)",
- local->lock.locks[lk_index]->xl->name,
- gfid, strerror (op_errno));
- } else {
- local->lock.locks[lk_index]->locked = 0;
- }
-
- call_cnt = dht_frame_return (frame);
- if (is_last_call (call_cnt)) {
- dht_inodelk_done (frame);
- }
-
- return 0;
-}
-
-call_frame_t *
-dht_lock_frame (call_frame_t *parent_frame)
-{
- call_frame_t *lock_frame = NULL;
-
- lock_frame = copy_frame (parent_frame);
- if (lock_frame == NULL)
+ /*
+ * Inode is already populated in the inode table.
+ * Which means we already looked up the inode and
+ * linked with a dentry. So that we will skip
+ * lookup on this entry, and proceed to next.
+ */
+ linked_inode = loc.inode;
+ bname = strtok_r(NULL, "/", &save_ptr);
+ if (!bname) {
goto out;
-
- set_lk_owner_from_ptr (&lock_frame->root->lk_owner, parent_frame->root);
-
-out:
- return lock_frame;
-}
-
-int32_t
-dht_unlock_inodelk (call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
- fop_inodelk_cbk_t inodelk_cbk)
-{
- dht_local_t *local = NULL;
- struct gf_flock flock = {0,};
- int ret = -1 , i = 0;
- call_frame_t *lock_frame = NULL;
- int call_cnt = 0;
-
- GF_VALIDATE_OR_GOTO ("dht-locks", frame, done);
- GF_VALIDATE_OR_GOTO (frame->this->name, lk_array, done);
- GF_VALIDATE_OR_GOTO (frame->this->name, inodelk_cbk, done);
-
- call_cnt = dht_lock_count (lk_array, lk_count);
- if (call_cnt == 0) {
- ret = 0;
- goto done;
- }
-
- lock_frame = dht_lock_frame (frame);
- if (lock_frame == NULL) {
- gf_log (frame->this->name, GF_LOG_WARNING,
- "cannot allocate a frame, not unlocking following "
- "locks:");
-
- dht_log_lk_array (frame->this->name, GF_LOG_WARNING, lk_array,
- lk_count);
- goto done;
+ }
+ inode_unref(loc.parent);
+ loc.parent = loc.inode;
+ gf_uuid_copy(loc.pargfid, loc.inode->gfid);
+ loc.inode = NULL;
+ continue;
}
- ret = dht_local_lock_init (lock_frame, lk_array, lk_count, inodelk_cbk);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_WARNING,
- "storing locks in local failed, not unlocking "
- "following locks:");
-
- dht_log_lk_array (frame->this->name, GF_LOG_WARNING, lk_array,
- lk_count);
-
- goto done;
- }
-
- local = lock_frame->local;
- local->main_frame = frame;
- local->call_cnt = call_cnt;
-
- flock.l_type = F_UNLCK;
-
- for (i = 0; i < local->lock.lk_count; i++) {
- if (!local->lock.locks[i]->locked)
- continue;
+ loc.name = bname;
+ ret = loc_path(&loc, bname);
- lock_frame->root->lk_owner = local->lock.locks[i]->lk_owner;
- STACK_WIND_COOKIE (lock_frame, dht_unlock_inodelk_cbk,
- (void *)(long)i,
- local->lock.locks[i]->xl,
- local->lock.locks[i]->xl->fops->inodelk,
- local->lock.locks[i]->domain,
- &local->lock.locks[i]->loc, F_SETLK,
- &flock, NULL);
+ ret = syncop_lookup(this, &loc, &iatt, NULL, NULL, NULL);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_SELFHEAL_FAILED,
+ "path=%s", path, "subvolume=%s", this->name, "bname=%s",
+ bname, NULL);
+ goto out;
}
- return 0;
-
-done:
- if (lock_frame)
- dht_lock_stack_destroy (lock_frame);
-
- /* no locks acquired, invoke inodelk_cbk */
- if (ret == 0)
- inodelk_cbk (frame, NULL, frame->this, 0, 0, NULL);
+ linked_inode = inode_link(loc.inode, loc.parent, bname, &iatt);
+ if (!linked_inode)
+ goto out;
- return ret;
-}
-
-int32_t
-dht_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- int lk_index = 0, call_cnt = 0;
- char gfid[GF_UUID_BUF_SIZE] = {0};
-
- local = frame->local;
- lk_index = (long) cookie;
-
- if (op_ret == -1) {
- local->lock.op_ret = -1;
- local->lock.op_errno = op_errno;
-
- if (local && local->lock.locks[lk_index]) {
- uuid_utoa_r (local->lock.locks[lk_index]->loc.inode->gfid,
- gfid);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "inodelk failed on gfid: %s "
- "subvolume: %s (%s)", gfid,
- local->lock.locks[lk_index]->xl->name,
- strerror (op_errno));
- }
-
- goto out;
- }
-
- local->lock.locks[lk_index]->locked = _gf_true;
+ loc_wipe(&loc);
+ gf_uuid_copy(loc.pargfid, linked_inode->gfid);
+ loc.inode = NULL;
+ bname = strtok_r(NULL, "/", &save_ptr);
+ if (bname)
+ loc.parent = linked_inode;
+ }
out:
- call_cnt = dht_frame_return (frame);
- if (is_last_call (call_cnt)) {
- if (local->lock.op_ret < 0) {
- dht_inodelk_cleanup (frame);
- return 0;
- }
+ inode_ref(linked_inode);
+ loc_wipe(&loc);
+ GF_FREE(tmp_path);
- dht_inodelk_done (frame);
- }
-
- return 0;
+ return linked_inode;
}
int
-dht_nonblocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array,
- int lk_count, fop_inodelk_cbk_t inodelk_cbk)
+dht_heal_full_path(void *data)
{
- struct gf_flock flock = {0,};
- int i = 0, ret = 0;
- dht_local_t *local = NULL;
- call_frame_t *lock_frame = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht-locks", frame, out);
- GF_VALIDATE_OR_GOTO (frame->this->name, lk_array, out);
- GF_VALIDATE_OR_GOTO (frame->this->name, inodelk_cbk, out);
-
- lock_frame = dht_lock_frame (frame);
- if (lock_frame == NULL)
- goto out;
-
- ret = dht_local_lock_init (lock_frame, lk_array, lk_count, inodelk_cbk);
- if (ret < 0) {
- goto out;
- }
-
- dht_set_lkowner (lk_array, lk_count, &lock_frame->root->lk_owner);
-
- local = lock_frame->local;
- local->main_frame = frame;
-
- local->call_cnt = lk_count;
-
- for (i = 0; i < local->lock.lk_count; i++) {
- flock.l_type = local->lock.locks[i]->type;
-
- STACK_WIND_COOKIE (lock_frame, dht_nonblocking_inodelk_cbk,
- (void *) (long) i,
- local->lock.locks[i]->xl,
- local->lock.locks[i]->xl->fops->inodelk,
- local->lock.locks[i]->domain,
- &local->lock.locks[i]->loc, F_SETLK,
- &flock, NULL);
+ call_frame_t *heal_frame = data;
+ dht_local_t *local = NULL;
+ loc_t loc = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ char *path = NULL;
+ int ret = -1;
+ xlator_t *source = NULL;
+ xlator_t *this = NULL;
+ inode_table_t *itable = NULL;
+ inode_t *inode = NULL;
+ inode_t *tmp_inode = NULL;
+
+ GF_VALIDATE_OR_GOTO("DHT", heal_frame, out);
+
+ local = heal_frame->local;
+ this = heal_frame->this;
+ source = heal_frame->cookie;
+ heal_frame->cookie = NULL;
+ gf_uuid_copy(loc.gfid, local->gfid);
+
+ if (local->loc.inode)
+ loc.inode = inode_ref(local->loc.inode);
+ else
+ goto out;
+
+ itable = loc.inode->table;
+ ret = syncop_getxattr(source, &loc, &dict, GET_ANCESTRY_PATH_KEY, NULL,
+ NULL);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_HEAL_ABORT,
+ "subvol=%s", source->name, NULL);
+ goto out;
+ }
+
+ ret = dict_get_str(dict, GET_ANCESTRY_PATH_KEY, &path);
+ if (path) {
+ inode = dht_heal_path(this, path, itable);
+ if (inode && inode != local->inode) {
+ /*
+ * if inode returned by heal function is different
+ * from what we passed, which means a racing thread
+ * already linked a different inode for dentry.
+ * So we will update our local->inode, so that we can
+ * retrurn proper inode.
+ */
+ tmp_inode = local->inode;
+ local->inode = inode;
+ inode_unref(tmp_inode);
+ tmp_inode = NULL;
+ } else {
+ inode_unref(inode);
}
-
- return 0;
+ }
out:
- if (lock_frame)
- dht_lock_stack_destroy (lock_frame);
-
- return -1;
+ loc_wipe(&loc);
+ if (dict)
+ dict_unref(dict);
+ return 0;
}
-int32_t
-dht_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+int
+dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data)
{
- int lk_index = 0;
- dht_local_t *local = NULL;
-
- lk_index = (long) cookie;
-
- local = frame->local;
-
- if (op_ret == 0) {
- local->lock.locks[lk_index]->locked = _gf_true;
- } else {
- local->lock.op_ret = -1;
- local->lock.op_errno = op_errno;
- goto cleanup;
- }
-
- if (lk_index == (local->lock.lk_count - 1)) {
- dht_inodelk_done (frame);
- } else {
- dht_blocking_inodelk_rec (frame, ++lk_index);
+ call_frame_t *main_frame = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ local = heal_frame->local;
+ main_frame = local->main_frame;
+ local->main_frame = NULL;
+ this = heal_frame->this;
+
+ dht_set_fixed_dir_stat(&local->postparent);
+ if (local->need_xattr_heal) {
+ local->need_xattr_heal = 0;
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s", local->loc.path,
+ NULL);
}
+ }
- return 0;
+ DHT_STACK_UNWIND(lookup, main_frame, 0, 0, local->inode, &local->stbuf,
+ local->xattr, &local->postparent);
-cleanup:
- dht_inodelk_cleanup (frame);
-
- return 0;
+ DHT_STACK_DESTROY(heal_frame);
+ return 0;
}
-void
-dht_blocking_inodelk_rec (call_frame_t *frame, int i)
+/* This function must be called inside an inode lock */
+int
+__dht_lock_subvol_set(inode_t *inode, xlator_t *this, xlator_t *lock_subvol)
{
- dht_local_t *local = NULL;
- struct gf_flock flock = {0,};
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+ uint64_t value = 0;
- local = frame->local;
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- flock.l_type = local->lock.locks[i]->type;
-
- STACK_WIND_COOKIE (frame, dht_blocking_inodelk_cbk,
- (void *) (long) i,
- local->lock.locks[i]->xl,
- local->lock.locks[i]->xl->fops->inodelk,
- local->lock.locks[i]->domain,
- &local->lock.locks[i]->loc, F_SETLKW, &flock, NULL);
+ ret = __inode_ctx_get0(inode, this, &value);
+ if (ret || !value) {
+ return -1;
+ }
- return;
+ ctx = (dht_inode_ctx_t *)(uintptr_t)value;
+ ctx->lock_subvol = lock_subvol;
+out:
+ return ret;
}
-int
-dht_lock_request_cmp (const void *val1, const void *val2)
+xlator_t *
+dht_get_lock_subvolume(xlator_t *this, struct gf_flock *lock,
+ dht_local_t *local)
{
- dht_lock_t *lock1 = NULL;
- dht_lock_t *lock2 = NULL;
- int ret = 0;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ int32_t ret = -1;
+ uint64_t value = 0;
+ xlator_t *cached_subvol = NULL;
+ dht_inode_ctx_t *ctx = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
- lock1 = *(dht_lock_t **)val1;
- lock2 = *(dht_lock_t **)val2;
+ GF_VALIDATE_OR_GOTO(this->name, lock, out);
+ GF_VALIDATE_OR_GOTO(this->name, local, out);
- GF_VALIDATE_OR_GOTO ("dht-locks", lock1, out);
- GF_VALIDATE_OR_GOTO ("dht-locks", lock2, out);
+ cached_subvol = local->cached_subvol;
- ret = strcmp (lock1->xl->name, lock2->xl->name);
-
- if (ret == 0) {
- ret = gf_uuid_compare (lock1->loc.gfid, lock2->loc.gfid);
- }
+ if (local->loc.inode || local->fd) {
+ inode = local->loc.inode ? local->loc.inode : local->fd->inode;
+ }
-out:
- return ret;
-}
+ if (!inode)
+ goto out;
-int
-dht_lock_order_requests (dht_lock_t **locks, int count)
-{
- int ret = -1;
-
- if (!locks || !count)
- goto out;
-
- qsort (locks, count, sizeof (*locks), dht_lock_request_cmp);
- ret = 0;
+ if (!(IA_ISDIR(inode->ia_type) || IA_ISINVAL(inode->ia_type))) {
+ /*
+ * We may get non-linked inode for directories as part
+ * of the selfheal code path. So checking for IA_INVAL
+ * type also. This will only happen for directory.
+ */
+ subvol = local->cached_subvol;
+ goto out;
+ }
+ if (lock->l_type != F_UNLCK) {
+ /*
+ * inode purging might happen on NFS between a lk
+ * and unlk. Due to this lk and unlk might be sent
+ * to different subvols.
+ * So during a lock request, taking a ref on inode
+ * to prevent inode purging. inode unref will happen
+ * in unlock cbk code path.
+ */
+ inode_ref(inode);
+ }
+
+ LOCK(&inode->lock);
+ ret = __inode_ctx_get0(inode, this, &value);
+ if (!ret && value) {
+ ctx = (dht_inode_ctx_t *)(uintptr_t)value;
+ subvol = ctx->lock_subvol;
+ }
+ if (!subvol && lock->l_type != F_UNLCK && cached_subvol) {
+ ret = __dht_lock_subvol_set(inode, this, cached_subvol);
+ if (ret) {
+ gf_uuid_unparse(inode->gfid, gfid);
+ UNLOCK(&inode->lock);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "lock_subvol gfid=%s", gfid, NULL);
+ goto post_unlock;
+ }
+ subvol = cached_subvol;
+ }
+ UNLOCK(&inode->lock);
+post_unlock:
+ if (!subvol && inode && lock->l_type != F_UNLCK) {
+ inode_unref(inode);
+ }
out:
- return ret;
+ return subvol;
}
int
-dht_blocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array,
- int lk_count, fop_inodelk_cbk_t inodelk_cbk)
+dht_lk_inode_unref(call_frame_t *frame, int32_t op_ret)
{
- int ret = -1;
- call_frame_t *lock_frame = NULL;
- dht_local_t *local = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht-locks", frame, out);
- GF_VALIDATE_OR_GOTO (frame->this->name, lk_array, out);
- GF_VALIDATE_OR_GOTO (frame->this->name, inodelk_cbk, out);
-
- lock_frame = dht_lock_frame (frame);
- if (lock_frame == NULL)
+ int ret = -1;
+ dht_local_t *local = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ this = frame->this;
+
+ if (local->loc.inode || local->fd) {
+ inode = local->loc.inode ? local->loc.inode : local->fd->inode;
+ }
+ if (!inode) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LOCK_INODE_UNREF_FAILED,
+ NULL);
+ goto out;
+ }
+
+ if (!(IA_ISDIR(inode->ia_type) || IA_ISINVAL(inode->ia_type))) {
+ ret = 0;
+ goto out;
+ }
+
+ switch (local->lock_type) {
+ case F_RDLCK:
+ case F_WRLCK:
+ if (op_ret) {
+ gf_uuid_unparse(inode->gfid, gfid);
+ gf_msg_debug(this->name, 0, "lock request failed for gfid %s",
+ gfid);
+ inode_unref(inode);
goto out;
-
- ret = dht_local_lock_init (lock_frame, lk_array, lk_count, inodelk_cbk);
- if (ret < 0) {
+ }
+ break;
+
+ case F_UNLCK:
+ if (!op_ret) {
+ inode_unref(inode);
+ } else {
+ gf_uuid_unparse(inode->gfid, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCK_INODE_UNREF_FAILED, "gfid=%s", gfid, NULL);
goto out;
- }
-
- dht_set_lkowner (lk_array, lk_count, &lock_frame->root->lk_owner);
-
- local = lock_frame->local;
- local->main_frame = frame;
-
- dht_blocking_inodelk_rec (lock_frame, 0);
-
- return 0;
+ }
+ default:
+ break;
+ }
+ ret = 0;
out:
- if (lock_frame)
- dht_lock_stack_destroy (lock_frame);
+ return ret;
+}
- return -1;
+/* Code to update custom extended attributes from src dict to dst dict
+ */
+void
+dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
+ dict_t *src, int *uret, int *uflag)
+{
+ int ret = -1;
+ data_t *keyval = NULL;
+ int luret = -1;
+ int luflag = -1;
+ int i = 0;
+ char **xattrs_to_heal;
+
+ if (!src || !dst) {
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DST_NULL_SET_FAILED,
+ "path=%s", local->loc.path, NULL);
+ return;
+ }
+ /* Check if any user xattr present in src dict and set
+ it to dst dict
+ */
+ luret = dict_foreach_fnmatch(src, "user.*", dht_set_user_xattr, dst);
+ /* Check if any other custom xattr present in src dict
+ and set it to dst dict, here index start from 1 because
+ user xattr already checked in previous statement
+ */
+
+ xattrs_to_heal = get_xattrs_to_heal();
+
+ for (i = 1; xattrs_to_heal[i]; i++) {
+ keyval = dict_get(src, xattrs_to_heal[i]);
+ if (keyval) {
+ luflag = 1;
+ ret = dict_set(dst, xattrs_to_heal[i], keyval);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_DICT_SET_FAILED, "key=%s", xattrs_to_heal[i],
+ "path=%s", local->loc.path, NULL);
+ keyval = NULL;
+ }
+ }
+ if (uret)
+ (*uret) = luret;
+ if (uflag)
+ (*uflag) = luflag;
}
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
index 78e3ef4233b..dbb8070b0da 100644
--- a/xlators/cluster/dht/src/dht-inode-read.c
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -8,1132 +8,1651 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "dht-common.h"
-int dht_access2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_readv2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_attr2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_open2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_flush2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_lk2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_fsync2 (xlator_t *this, call_frame_t *frame, int ret);
-
-int
-dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+static int
+dht_access2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_readv2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_attr2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_open2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_flush2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_lk2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_fsync2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_common_xattrop2(xlator_t *this, xlator_t *subvol, call_frame_t *frame,
+ int ret);
+
+static int
+dht_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, fd_t *fd, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = 0;
-
- local = frame->local;
- prev = cookie;
-
- local->op_errno = op_errno;
- if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
- gf_msg_debug (this->name, 0,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto out;
- }
-
- if (!op_ret || (local->call_cnt != 1))
- goto out;
-
- /* rebalance would have happened */
- local->rebalance.target_op_fn = dht_open2;
- ret = dht_rebalance_complete_check (this, frame);
- if (!ret)
- return 0;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto out;
+ }
+
+ /* Update ctx if the fd has been opened on the target*/
+ if (!op_ret && (local->call_cnt == 1)) {
+ dht_fd_ctx_set(this, fd, prev);
+ goto out;
+ }
+
+ if (!op_ret || (local->call_cnt != 1))
+ goto out;
+
+ /* rebalance would have happened */
+ local->rebalance.target_op_fn = dht_open2;
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
out:
- DHT_STACK_UNWIND (open, frame, op_ret, op_errno, local->fd, xdata);
+ DHT_STACK_UNWIND(open, frame, op_ret, op_errno, local->fd, xdata);
- return 0;
+ return 0;
}
-int
-dht_open2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_open2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- int op_errno = EINVAL;
-
- local = frame->local;
- if (!local)
- goto out;
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
- op_errno = ENOENT;
- if (op_ret)
- goto out;
+ if (!frame || !frame->local)
+ goto out;
- local->call_cnt = 2;
- subvol = local->cached_subvol;
+ local = frame->local;
+ op_errno = local->op_errno;
- STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open,
- &local->loc, local->rebalance.flags, local->fd,
- NULL);
+ if (we_are_not_migrating(ret)) {
+ /* This DHT layer is not migrating the file */
+ DHT_STACK_UNWIND(open, frame, -1, local->op_errno, NULL,
+ local->rebalance.xdata);
return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2;
+
+ STACK_WIND_COOKIE(frame, dht_open_cbk, subvol, subvol, subvol->fops->open,
+ &local->loc, local->rebalance.flags, local->fd,
+ local->xattr_req);
+ return 0;
out:
- DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
- return 0;
+ DHT_STACK_UNWIND(open, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
int
-dht_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, fd_t *fd, dict_t *xdata)
+dht_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, loc, fd, GF_FOP_OPEN);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
- local->rebalance.flags = flags;
- local->call_cnt = 1;
+ local = dht_local_init(frame, loc, fd, GF_FOP_OPEN);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open,
- loc, flags, fd, xdata);
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
- return 0;
+ local->rebalance.flags = flags;
+ local->call_cnt = 1;
+
+ STACK_WIND_COOKIE(frame, dht_open_cbk, subvol, subvol, subvol->fops->open,
+ loc, flags, fd, xdata);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(open, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
int
-dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata)
+dht_file_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *stbuf, dict_t *xdata)
{
- xlator_t *subvol = 0;
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = -1;
- inode_t *inode = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
-
- local = frame->local;
- prev = cookie;
-
- if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
- local->op_errno = op_errno;
- gf_msg_debug (this->name, 0,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto out;
- }
-
- if (local->call_cnt != 1)
- goto out;
+ xlator_t *subvol1 = 0;
+ xlator_t *subvol2 = 0;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((local->fop == GF_FOP_FSTAT) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
local->op_errno = op_errno;
- /* Check if the rebalance phase2 is true */
- if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
- inode = (local->fd) ? local->fd->inode : local->loc.inode;
- ret = dht_inode_ctx_get1 (this, inode, &subvol);
- if (!subvol) {
- /* Phase 2 of migration */
- local->rebalance.target_op_fn = dht_attr2;
- ret = dht_rebalance_complete_check (this, frame);
- if (!ret)
- return 0;
- } else {
- /* value is already set in fd_ctx, that means no need
- to check for whether its complete or not. */
- dht_attr2 (this, frame, 0);
- return 0;
- }
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ local->op_errno = op_errno;
+ local->op_ret = op_ret;
+
+ /* Check if the rebalance phase2 is true */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(stbuf)) {
+ local->rebalance.target_op_fn = dht_attr2;
+ dht_set_local_rebalance(this, local, NULL, NULL, stbuf, xdata);
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+
+ dht_inode_ctx_get_mig_info(this, inode, &subvol1, &subvol2);
+ if (dht_mig_info_is_invalid(local->cached_subvol, subvol1, subvol2)) {
+ /* Phase 2 of migration */
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ /* it is a non-fd op or it is an fd based Fop and
+ opened on the dst.*/
+ if (local->fd && !dht_fd_open_on_dst(this, local->fd, subvol2)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ dht_attr2(this, subvol2, frame, 0);
+ return 0;
+ }
}
+ }
out:
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (stat, frame, op_ret, op_errno, stbuf, xdata);
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ DHT_STACK_UNWIND(stat, frame, op_ret, op_errno, stbuf, xdata);
err:
- return 0;
+ return 0;
}
-int
-dht_attr2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_attr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- int op_errno = EINVAL;
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(stat, frame, local->op_ret, op_errno,
+ &local->rebalance.postbuf, local->rebalance.xdata);
+ return 0;
+ }
- local = frame->local;
- if (!local)
- goto out;
+ if (subvol == NULL)
+ goto out;
- op_errno = local->op_errno;
- if (op_ret == -1)
- goto out;
+ local->call_cnt = 2;
- subvol = local->cached_subvol;
- local->call_cnt = 2;
+ if (local->fop == GF_FOP_FSTAT) {
+ STACK_WIND_COOKIE(frame, dht_file_attr_cbk, subvol, subvol,
+ subvol->fops->fstat, local->fd, local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_file_attr_cbk, subvol, subvol,
+ subvol->fops->stat, &local->loc, local->xattr_req);
+ }
- if (local->fop == GF_FOP_FSTAT) {
- STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->fstat, local->fd, NULL);
- } else {
- STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->stat, &local->loc, NULL);
- }
- return 0;
+ return 0;
out:
- DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
- return 0;
+ DHT_STACK_UNWIND(stat, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+static int
+dht_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *stbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ local = frame->local;
+ prev = cookie;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+
+ goto post_unlock;
+ }
+
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+
+ local->op_ret = 0;
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ DHT_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno,
+ &local->stbuf, xdata);
+ }
+
+ return 0;
}
int
-dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata)
+dht_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_STAT);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0, "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ if (IA_ISREG(loc->inode->ia_type)) {
+ local->call_cnt = 1;
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+ subvol = local->cached_subvol;
- local = frame->local;
- prev = cookie;
+ STACK_WIND_COOKIE(frame, dht_file_attr_cbk, subvol, subvol,
+ subvol->fops->stat, loc, xdata);
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_msg_debug (this->name, 0,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
+ return 0;
+ }
- goto unlock;
- }
+ local->call_cnt = call_cnt = layout->cnt;
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ for (i = 0; i < call_cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND_COOKIE(frame, dht_attr_cbk, subvol, subvol,
+ subvol->fops->stat, loc, xdata);
+ }
+
+ return 0;
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
-out:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno,
- &local->stbuf, xdata);
- }
err:
- return 0;
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(stat, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
}
int
-dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+dht_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
- int call_cnt = 0;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_STAT);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FSTAT);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ if (IA_ISREG(fd->inode->ia_type)) {
+ local->call_cnt = 1;
- layout = local->layout;
- if (!layout) {
- gf_msg_debug (this->name, 0,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ subvol = local->cached_subvol;
- if (IA_ISREG (loc->inode->ia_type)) {
- local->call_cnt = 1;
+ STACK_WIND_COOKIE(frame, dht_file_attr_cbk, subvol, subvol,
+ subvol->fops->fstat, fd, xdata);
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND_COOKIE(frame, dht_attr_cbk, subvol, subvol,
+ subvol->fops->fstat, fd, xdata);
+ }
- subvol = local->cached_subvol;
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fstat, frame, -1, op_errno, NULL, NULL);
- STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->stat, loc, xdata);
+ return 0;
+}
+int
+dht_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iovec *vector, int count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = 0;
+ xlator_t *src_subvol = 0;
+ xlator_t *dst_subvol = 0;
+
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ /* This is already second try, no need for re-check */
+ if (local->call_cnt != 1)
+ goto out;
+
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
+ if ((op_ret == -1) && !dht_inode_missing(op_errno))
+ goto out;
+
+ local->op_errno = op_errno;
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(stbuf)) {
+ local->op_ret = op_ret;
+ local->rebalance.target_op_fn = dht_readv2;
+ dht_set_local_rebalance(this, local, NULL, NULL, stbuf, xdata);
+ /* File would be migrated to other node */
+ ret = dht_inode_ctx_get_mig_info(this, local->fd->inode, &src_subvol,
+ &dst_subvol);
+
+ if (dht_mig_info_is_invalid(local->cached_subvol, src_subvol,
+ dst_subvol) ||
+ !dht_fd_open_on_dst(this, local->fd, dst_subvol)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
return 0;
+ } else {
+ /* value is already set in fd_ctx, that means no need
+ to check for whether its complete or not. */
+ dht_readv2(this, dst_subvol, frame, 0);
+ return 0;
}
+ }
- local->call_cnt = call_cnt = layout->cnt;
+out:
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
- for (i = 0; i < call_cnt; i++) {
- subvol = layout->list[i].xlator;
+ DHT_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
- STACK_WIND (frame, dht_attr_cbk,
- subvol, subvol->fops->stat,
- loc, xdata);
- }
+ return 0;
+}
+static int
+dht_readv2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(readv, frame, local->op_ret, op_errno, NULL, 0,
+ &local->rebalance.postbuf, NULL,
+ local->rebalance.xdata);
return 0;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+ if (subvol == NULL)
+ goto out;
- return 0;
-}
+ local->call_cnt = 2;
+
+ STACK_WIND(frame, dht_readv_cbk, subvol, subvol->fops->readv, local->fd,
+ local->rebalance.size, local->rebalance.offset,
+ local->rebalance.flags, local->xattr_req);
+ return 0;
+
+out:
+ DHT_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
+ return 0;
+}
int
-dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+dht_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t off,
+ uint32_t flags, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
- int call_cnt = 0;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_FSTAT);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- layout = local->layout;
- if (!layout) {
- gf_msg_debug (this->name, 0,
- "no layout for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
- if (IA_ISREG (fd->inode->ia_type)) {
- local->call_cnt = 1;
+ local = dht_local_init(frame, NULL, fd, GF_FOP_READ);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- subvol = local->cached_subvol;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
- STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->fstat, fd, xdata);
-
- return 0;
- }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
- local->call_cnt = call_cnt = layout->cnt;
+ local->rebalance.offset = off;
+ local->rebalance.size = size;
+ local->rebalance.flags = flags;
+ local->call_cnt = 1;
- for (i = 0; i < call_cnt; i++) {
- subvol = layout->list[i].xlator;
- STACK_WIND (frame, dht_attr_cbk,
- subvol, subvol->fops->fstat,
- fd, xdata);
- }
+ STACK_WIND(frame, dht_readv_cbk, subvol, subvol->fops->readv, local->fd,
+ local->rebalance.size, local->rebalance.offset,
+ local->rebalance.flags, local->xattr_req);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-int
-dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- struct iovec *vector, int count, struct iatt *stbuf,
- struct iobref *iobref, dict_t *xdata)
+static int
+dht_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int ret = 0;
- inode_t *inode = NULL;
- xlator_t *subvol = 0;
-
- local = frame->local;
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- /* This is already second try, no need for re-check */
- if (local->call_cnt != 1)
- goto out;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (!prev)
+ goto out;
+ if (local->call_cnt != 1)
+ goto out;
+ if ((op_ret == -1) &&
+ ((op_errno == ENOTCONN) || dht_inode_missing(op_errno)) &&
+ IA_ISDIR(local->loc.inode->ia_type)) {
+ subvol = dht_subvol_next_available(this, prev);
+ if (!subvol)
+ goto out;
- if ((op_ret == -1) && !dht_inode_missing(op_errno))
- goto out;
+ /* check if we are done with visiting every node */
+ if (subvol == local->cached_subvol) {
+ goto out;
+ }
+ STACK_WIND_COOKIE(frame, dht_access_cbk, subvol, subvol,
+ subvol->fops->access, &local->loc,
+ local->rebalance.flags, NULL);
+ return 0;
+ }
+ if ((op_ret == -1) && dht_inode_missing(op_errno) &&
+ !(IA_ISDIR(local->loc.inode->ia_type))) {
+ /* File would be migrated to other node */
local->op_errno = op_errno;
- if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
- /* File would be migrated to other node */
- ret = dht_inode_ctx_get1 (this, inode, &subvol);
- if (!subvol) {
- local->rebalance.target_op_fn = dht_readv2;
- ret = dht_rebalance_complete_check (this, frame);
- if (!ret)
- return 0;
- } else {
- /* value is already set in fd_ctx, that means no need
- to check for whether its complete or not. */
- dht_readv2 (this, frame, 0);
- return 0;
- }
- }
+ local->rebalance.target_op_fn = dht_access2;
+ ret = dht_rebalance_complete_check(frame->this, frame);
+ if (!ret)
+ return 0;
+ }
out:
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf,
- iobref, xdata);
-
- return 0;
+ DHT_STACK_UNWIND(access, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int
-dht_readv2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_access2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- int op_errno = EINVAL;
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
- local = frame->local;
- if (!local)
- goto out;
+ local = frame->local;
+ if (!local)
+ goto out;
- op_errno = local->op_errno;
- if (op_ret == -1)
- goto out;
+ op_errno = local->op_errno;
- local->call_cnt = 2;
- subvol = local->cached_subvol;
-
- STACK_WIND (frame, dht_readv_cbk, subvol, subvol->fops->readv,
- local->fd, local->rebalance.size, local->rebalance.offset,
- local->rebalance.flags, NULL);
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(access, frame, -1, op_errno, NULL);
return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2;
+
+ STACK_WIND_COOKIE(frame, dht_access_cbk, subvol, subvol,
+ subvol->fops->access, &local->loc, local->rebalance.flags,
+ local->xattr_req);
+
+ return 0;
out:
- DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
- return 0;
+ DHT_STACK_UNWIND(access, frame, -1, op_errno, NULL);
+ return 0;
}
int
-dht_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off, uint32_t flags, dict_t *xdata)
+dht_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_READ);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_ACCESS);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.flags = mask;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ STACK_WIND_COOKIE(frame, dht_access_cbk, subvol, subvol,
+ subvol->fops->access, loc, mask, xdata);
+
+ return 0;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(access, frame, -1, op_errno, NULL);
- local->rebalance.offset = off;
- local->rebalance.size = size;
- local->rebalance.flags = flags;
- local->call_cnt = 1;
+ return 0;
+}
+
+int
+dht_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = 0;
+ int ret = 0;
+
+ local = frame->local;
- STACK_WIND (frame, dht_readv_cbk,
- subvol, subvol->fops->readv,
- fd, size, off, flags, xdata);
+ local->op_errno = op_errno;
+ if (local->call_cnt != 1)
+ goto out;
+
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
return 0;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
+ local->rebalance.target_op_fn = dht_flush2;
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ /* If context is set, then send flush() it to the destination */
+ dht_inode_ctx_get_mig_info(this, local->fd->inode, NULL, &subvol);
+ if (subvol && dht_fd_open_on_dst(this, local->fd, subvol)) {
+ dht_flush2(this, subvol, frame, 0);
return 0;
-}
+ }
-int
-dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- int ret = -1;
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- call_frame_t *prev = NULL;
-
- local = frame->local;
- prev = cookie;
-
- if (!prev || !prev->this)
- goto out;
- if (local->call_cnt != 1)
- goto out;
- if ((op_ret == -1) && ((op_errno == ENOTCONN) ||
- dht_inode_missing(op_errno)) &&
- IA_ISDIR(local->loc.inode->ia_type)) {
- subvol = dht_subvol_next_available (this, prev->this);
- if (!subvol)
- goto out;
-
- /* check if we are done with visiting every node */
- if (subvol == local->cached_subvol) {
- goto out;
- }
-
- STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
- &local->loc, local->rebalance.flags, NULL);
- return 0;
- }
- if ((op_ret == -1) && dht_inode_missing(op_errno) &&
- !(IA_ISDIR(local->loc.inode->ia_type))) {
- /* File would be migrated to other node */
- local->op_errno = op_errno;
- local->rebalance.target_op_fn = dht_access2;
- ret = dht_rebalance_complete_check (frame->this, frame);
- if (!ret)
- return 0;
+ if (op_errno == EREMOTE) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret) {
+ return 0;
}
+ }
out:
- DHT_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
- return 0;
+ DHT_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata);
+
+ return 0;
}
-int
-dht_access2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_flush2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- int op_errno = EINVAL;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
- local = frame->local;
- if (!local)
- goto out;
+ if ((frame == NULL) || (frame->local == NULL))
+ goto out;
- op_errno = local->op_errno;
- if (op_ret == -1)
- goto out;
+ local = frame->local;
- local->call_cnt = 2;
- subvol = local->cached_subvol;
+ op_errno = local->op_errno;
- STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
- &local->loc, local->rebalance.flags, NULL);
+ if (subvol == NULL)
+ goto out;
- return 0;
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_flush_cbk, subvol, subvol->fops->flush, local->fd,
+ local->xattr_req);
+
+ return 0;
out:
- DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL);
- return 0;
+ DHT_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
+ return 0;
}
-
int
-dht_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
- dict_t *xdata)
+dht_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_ACCESS);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- local->rebalance.flags = mask;
- local->call_cnt = 1;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
- STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
- loc, mask, xdata);
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FLUSH);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- return 0;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ local->call_cnt = 1;
+
+ STACK_WIND(frame, dht_flush_cbk, subvol, subvol->fops->flush, fd,
+ local->xattr_req);
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
-
int
-dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+dht_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- inode_t *inode = NULL;
- xlator_t *subvol = 0;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+ xlator_t *src_subvol = 0;
+ xlator_t *dst_subvol = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
+ if (op_ret == -1 && !dht_inode_missing(op_errno)) {
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge(this, postbuf, &local->stbuf);
+ dht_iatt_merge(this, prebuf, &local->prebuf);
+ }
+ goto out;
+ }
- local = frame->local;
+ local->op_ret = op_ret;
+ inode = local->fd->inode;
- local->op_errno = op_errno;
+ local->rebalance.target_op_fn = dht_fsync2;
+ dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata);
- if (local->call_cnt != 1)
- goto out;
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
+ dht_iatt_merge(this, &local->stbuf, postbuf);
+ dht_iatt_merge(this, &local->prebuf, prebuf);
+
+ dht_inode_ctx_get_mig_info(this, inode, &src_subvol, &dst_subvol);
- /* If context is set, then send flush() it to the destination */
- dht_inode_ctx_get1 (this, inode, &subvol);
- if (subvol) {
- dht_flush2 (this, frame, 0);
+ if (dht_mig_info_is_invalid(local->cached_subvol, src_subvol,
+ dst_subvol) ||
+ !dht_fd_open_on_dst(this, local->fd, dst_subvol)) {
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
return 0;
+ } else {
+ dht_fsync2(this, dst_subvol, frame, 0);
+ return 0;
}
+ }
out:
- DHT_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata);
+ DHT_STRIP_PHASE1_FLAGS(postbuf);
+ DHT_STRIP_PHASE1_FLAGS(prebuf);
- return 0;
+ DHT_STACK_UNWIND(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
}
-int
-dht_flush2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_fsync2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
-
- local = frame->local;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if ((frame == NULL) || (frame->local == NULL))
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(fsync, frame, local->op_ret, op_errno,
+ &local->rebalance.prebuf, &local->rebalance.postbuf,
+ local->rebalance.xdata);
+ return 0;
+ }
- dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ if (subvol == NULL)
+ goto out;
- if (!subvol)
- subvol = local->cached_subvol;
+ local->call_cnt = 2; /* This is the second attempt */
- local->call_cnt = 2; /* This is the second attempt */
+ STACK_WIND_COOKIE(frame, dht_fsync_cbk, subvol, subvol, subvol->fops->fsync,
+ local->fd, local->rebalance.flags, local->xattr_req);
- STACK_WIND (frame, dht_flush_cbk,
- subvol, subvol->fops->flush, local->fd, NULL);
+ return 0;
- return 0;
+out:
+ DHT_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-
int
-dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+dht_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_FLUSH);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
- local->call_cnt = 1;
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FSYNC);
+ if (!local) {
+ op_errno = ENOMEM;
- STACK_WIND (frame, dht_flush_cbk,
- subvol, subvol->fops->flush, fd, xdata);
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
- return 0;
+ local->call_cnt = 1;
+ local->rebalance.flags = datasync;
+
+ subvol = local->cached_subvol;
+
+ STACK_WIND_COOKIE(frame, dht_fsync_cbk, subvol, subvol, subvol->fops->fsync,
+ local->fd, local->rebalance.flags, local->xattr_req);
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
-int
-dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct iatt *prebuf, struct iatt *postbuf,
- dict_t *xdata)
+/* TODO: for 'lk()' call, we need some other special error, may be ESTALE to
+ indicate that lock migration happened on the fd, so we can consider it as
+ phase 2 of migration */
+static int
+dht_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct gf_flock *flock, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = -1;
- inode_t *inode = NULL;
- xlator_t *subvol = 0;
+ dht_local_t *local = NULL;
+ int ret = -1;
+ xlator_t *subvol = NULL;
- local = frame->local;
- prev = cookie;
+ local = frame->local;
- local->op_errno = op_errno;
- if (op_ret == -1 && !dht_inode_missing(op_errno)) {
- gf_msg_debug (this->name, 0,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto out;
- }
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
- if (local->call_cnt != 1) {
- if (local->stbuf.ia_blocks) {
- dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
- dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
- }
- goto out;
- }
+ if (local->call_cnt != 1)
+ goto out;
- local->op_errno = op_errno;
- dht_inode_ctx_get1 (this, inode, &subvol);
- if (!subvol) {
- local->rebalance.target_op_fn = dht_fsync2;
-
- /* Check if the rebalance phase1 is true */
- if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
- dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
- dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
-
- ret = dht_rebalance_in_progress_check (this, frame);
- }
-
- /* Check if the rebalance phase2 is true */
- if (IS_DHT_MIGRATION_PHASE2 (postbuf)) {
- ret = dht_rebalance_complete_check (this, frame);
- }
- if (!ret)
- return 0;
+ local->rebalance.target_op_fn = dht_lk2;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (xdata)
+ local->rebalance.xdata = dict_ref(xdata);
+
+ if (op_errno == EREMOTE) {
+ dht_inode_ctx_get_mig_info(this, local->fd->inode, NULL, &subvol);
+ if (subvol && dht_fd_open_on_dst(this, local->fd, subvol)) {
+ dht_lk2(this, subvol, frame, 0);
+ return 0;
} else {
- dht_fsync2 (this, frame, 0);
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret) {
return 0;
+ }
}
+ }
out:
- DHT_STRIP_PHASE1_FLAGS (postbuf);
- DHT_STRIP_PHASE1_FLAGS (prebuf);
- DHT_STACK_UNWIND (fsync, frame, op_ret, op_errno,
- prebuf, postbuf, xdata);
+ dht_lk_inode_unref(frame, op_ret);
+ DHT_STACK_UNWIND(lk, frame, op_ret, op_errno, flock, xdata);
- return 0;
+ return 0;
}
-int
-dht_fsync2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_lk2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
- local = frame->local;
+ if ((frame == NULL) || (frame->local == NULL))
+ goto out;
- dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
- if (!subvol)
- subvol = local->cached_subvol;
+ local = frame->local;
- local->call_cnt = 2; /* This is the second attempt */
+ op_errno = local->op_errno;
- STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync,
- local->fd, local->rebalance.flags, NULL);
+ if (subvol == NULL)
+ goto out;
- return 0;
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_lk_cbk, subvol, subvol->fops->lk, local->fd,
+ local->rebalance.lock_cmd, &local->rebalance.flock,
+ local->xattr_req);
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
int
-dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
- dict_t *xdata)
+dht_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ xlator_t *lock_subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_LK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->lock_type = flock->l_type;
+ lock_subvol = dht_get_lock_subvolume(this, flock, local);
+ if (!lock_subvol) {
+ gf_msg_debug(this->name, 0, "no lock subvolume for path=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /*
+ local->cached_subvol = lock_subvol;
+ ret = dht_check_and_open_fd_on_subvol (this, frame);
+ if (ret)
+ goto err;
+ */
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ local->rebalance.flock = *flock;
+ local->rebalance.lock_cmd = cmd;
+
+ local->call_cnt = 1;
+
+ STACK_WIND(frame, dht_lk_cbk, lock_subvol, lock_subvol->fops->lk, fd, cmd,
+ flock, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ return 0;
+}
- local = dht_local_init (frame, NULL, fd, GF_FOP_FSYNC);
- if (!local) {
- op_errno = ENOMEM;
+static int
+dht_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct gf_lease *lease, dict_t *xdata)
+{
+ DHT_STACK_UNWIND(lease, frame, op_ret, op_errno, lease, xdata);
- goto err;
- }
+ return 0;
+}
- local->call_cnt = 1;
- local->rebalance.flags = datasync;
+int
+dht_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
- subvol = local->cached_subvol;
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
- STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync,
- fd, datasync, xdata);
+ subvol = dht_subvol_get_cached(this, loc->inode);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- return 0;
+ /* TODO: for rebalance, we need to preserve the fop arguments */
+ STACK_WIND(frame, dht_lease_cbk, subvol, subvol->fops->lease, loc, lease,
+ xdata);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lease, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-
-/* TODO: for 'lk()' call, we need some other special error, may be ESTALE to
- indicate that lock migration happened on the fd, so we can consider it as
- phase 2 of migration */
-int
-dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct gf_flock *flock, dict_t *xdata)
+/* Symlinks are currently not migrated, so no need for any check here */
+static int
+dht_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, const char *path, struct iatt *stbuf,
+ dict_t *xdata)
{
- DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock, xdata);
+ dht_local_t *local = NULL;
- return 0;
-}
+ local = frame->local;
+ if (op_ret == -1)
+ goto err;
+
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+err:
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ DHT_STACK_UNWIND(readlink, frame, op_ret, op_errno, path, stbuf, xdata);
+
+ return 0;
+}
int
-dht_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int cmd, struct gf_flock *flock, dict_t *xdata)
+dht_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_READLINK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND(frame, dht_readlink_cbk, subvol, subvol->fops->readlink, loc,
+ size, xdata);
+
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(readlink, frame, -1, op_errno, NULL, NULL, NULL);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ return 0;
+}
- /* TODO: for rebalance, we need to preserve the fop arguments */
- STACK_WIND (frame, dht_lk_cbk, subvol, subvol->fops->lk, fd,
- cmd, flock, xdata);
+/* Get both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
+ * Use DHT_MODE_IN_XDATA_KEY if available, else fall back to
+ * DHT_IATT_IN_XDATA_KEY
+ * This will return a dummy iatt with only the mode and type set
+ */
+static int
+dht_read_iatt_from_xdata(dict_t *xdata, struct iatt *stbuf)
+{
+ int ret = -1;
+ int32_t mode = 0;
- return 0;
+ ret = dict_get_int32(xdata, DHT_MODE_IN_XDATA_KEY, &mode);
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
+ if (ret) {
+ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
+ } else {
+ stbuf->ia_prot = ia_prot_from_st_mode(mode);
+ stbuf->ia_type = ia_type_from_st_mode(mode);
+ }
- return 0;
+ return ret;
}
-/* Symlinks are currently not migrated, so no need for any check here */
int
-dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, const char *path,
- struct iatt *stbuf, dict_t *xdata)
+dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *call_frame = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *src_subvol = NULL;
+ xlator_t *dst_subvol = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ int ret = -1;
+ inode_t *inode = NULL;
+
+ local = frame->local;
+ call_frame = cookie;
+ prev = call_frame->this;
+
+ local->op_errno = op_errno;
+
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1.",
+ prev->name);
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
+ ret = dht_read_iatt_from_xdata(xdata, &stbuf);
- local = frame->local;
- if (op_ret == -1)
- goto err;
+ if ((!op_ret) && (ret)) {
+ /* This is a potential problem and can cause corruption
+ * with sharding.
+ * Oh well. We tried.
+ */
+ goto out;
+ }
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
+ local->op_ret = op_ret;
+ local->rebalance.target_op_fn = dht_common_xattrop2;
+ if (xdata)
+ local->rebalance.xdata = dict_ref(xdata);
+
+ if (dict)
+ local->rebalance.dict = dict_ref(dict);
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(&stbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(&stbuf)) {
+ inode = local->loc.inode ? local->loc.inode : local->fd->inode;
+ dht_inode_ctx_get_mig_info(this, inode, &src_subvol, &dst_subvol);
+
+ if (dht_mig_info_is_invalid(local->cached_subvol, src_subvol,
+ dst_subvol) ||
+ !dht_fd_open_on_dst(this, local->fd, dst_subvol)) {
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ dht_common_xattrop2(this, dst_subvol, frame, 0);
+ return 0;
}
+ }
-err:
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, stbuf, xdata);
+out:
+ if (local->fop == GF_FOP_XATTROP) {
+ DHT_STACK_UNWIND(xattrop, frame, op_ret, op_errno, dict, xdata);
+ } else {
+ DHT_STACK_UNWIND(fxattrop, frame, op_ret, op_errno, dict, xdata);
+ }
- return 0;
+ return 0;
}
-
-int
-dht_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
- dict_t *xdata)
+static int
+dht_common_xattrop2(xlator_t *this, xlator_t *subvol, call_frame_t *frame,
+ int ret)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_READLINK);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if ((frame == NULL) || (frame->local == NULL))
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ if (local->fop == GF_FOP_XATTROP) {
+ DHT_STACK_UNWIND(xattrop, frame, local->op_ret, op_errno,
+ local->rebalance.dict, local->rebalance.xdata);
+ } else {
+ DHT_STACK_UNWIND(fxattrop, frame, local->op_ret, op_errno,
+ local->rebalance.dict, local->rebalance.xdata);
}
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ return 0;
+ }
- STACK_WIND (frame, dht_readlink_cbk,
- subvol, subvol->fops->readlink,
- loc, size, xdata);
+ if (subvol == NULL)
+ goto out;
- return 0;
+ local->call_cnt = 2; /* This is the second attempt */
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL, NULL);
+ if (local->fop == GF_FOP_XATTROP) {
+ STACK_WIND(frame, dht_common_xattrop_cbk, subvol, subvol->fops->xattrop,
+ &local->loc, local->rebalance.flags, local->rebalance.xattr,
+ local->xattr_req);
+ } else {
+ STACK_WIND(frame, dht_common_xattrop_cbk, subvol,
+ subvol->fops->fxattrop, local->fd, local->rebalance.flags,
+ local->rebalance.xattr, local->xattr_req);
+ }
- return 0;
-}
+ return 0;
-/* Currently no translators on top of 'distribute' will be using
- * below fops, hence not implementing 'migration' related checks
- */
+out:
-int
-dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+ /* If local is unavailable we could be unwinding the wrong
+ * function here */
+
+ if (local && (local->fop == GF_FOP_XATTROP)) {
+ DHT_STACK_UNWIND(xattrop, frame, -1, op_errno, NULL, NULL);
+ } else {
+ DHT_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL);
+ }
+ return 0;
+}
+
+static int
+dht_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ DHT_STACK_UNWIND(xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
+/* Set both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
+ * Use DHT_MODE_IN_XDATA_KEY if available. Else fall back to
+ * DHT_IATT_IN_XDATA_KEY
+ */
+static int
+dht_request_iatt_in_xdata(dict_t *xattr_req)
+{
+ int ret = -1;
+
+ ret = dict_set_int8(xattr_req, DHT_MODE_IN_XDATA_KEY, 1);
+ ret = dict_set_int8(xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
+
+ /* At least one call succeeded */
+ return ret;
+}
int
-dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+dht_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_XATTROP);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_XATTROP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for gfid=%s",
+ uuid_utoa(loc->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /* Todo : Handle dirs as well. At the moment the only xlator above dht
+ * that uses xattrop is sharding and that is only for files */
+
+ if (IA_ISDIR(loc->inode->ia_type)) {
+ STACK_WIND(frame, dht_xattrop_cbk, subvol, subvol->fops->xattrop, loc,
+ flags, dict, xdata);
+
+ } else {
+ local->xattr_req = xdata ? dict_ref(xdata) : dict_new();
+ local->call_cnt = 1;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local->rebalance.xattr = dict_ref(dict);
+ local->rebalance.flags = flags;
- local->call_cnt = 1;
+ ret = dht_request_iatt_in_xdata(local->xattr_req);
- STACK_WIND (frame,
- dht_xattrop_cbk,
- subvol, subvol->fops->xattrop,
- loc, flags, dict, xdata);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Failed to set dictionary key %s file=%s",
+ DHT_IATT_IN_XDATA_KEY, loc->path);
+ }
- return 0;
+ STACK_WIND(frame, dht_common_xattrop_cbk, subvol, subvol->fops->xattrop,
+ loc, local->rebalance.flags, local->rebalance.xattr,
+ local->xattr_req);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(xattrop, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-
-int
-dht_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+static int
+dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ DHT_STACK_UNWIND(fxattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
-
int
-dht_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+dht_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ subvol = dht_subvol_get_cached(this, fd->inode);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FXATTROP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ /* Todo : Handle dirs as well. At the moment the only xlator above dht
+ * that uses xattrop is sharding and that is only for files */
+
+ if (IA_ISDIR(fd->inode->ia_type)) {
+ STACK_WIND(frame, dht_fxattrop_cbk, subvol, subvol->fops->fxattrop, fd,
+ flags, dict, xdata);
+
+ } else {
+ local->xattr_req = xdata ? dict_ref(xdata) : dict_new();
+ local->call_cnt = 1;
+
+ local->rebalance.xattr = dict_ref(dict);
+ local->rebalance.flags = flags;
+
+ ret = dht_request_iatt_in_xdata(local->xattr_req);
+
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Failed to set dictionary key %s fd=%p",
+ DHT_IATT_IN_XDATA_KEY, fd);
}
- STACK_WIND (frame,
- dht_fxattrop_cbk,
- subvol, subvol->fops->fxattrop,
- fd, flags, dict, xdata);
+ STACK_WIND(frame, dht_common_xattrop_cbk, subvol,
+ subvol->fops->fxattrop, fd, local->rebalance.flags,
+ local->rebalance.xattr, local->xattr_req);
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
+/* Currently no translators on top of 'distribute' will be using
+ * below fops, hence not implementing 'migration' related checks
+ */
-int
-dht_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+static int
+dht_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata);
- return 0;
+ dht_lk_inode_unref(frame, op_ret);
+ DHT_STACK_UNWIND(inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
-
int32_t
-dht_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+dht_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
+ xlator_t *lock_subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
- local = dht_local_init (frame, loc, NULL, GF_FOP_INODELK);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ local = dht_local_init(frame, loc, NULL, GF_FOP_INODELK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local->lock_type = lock->l_type;
+ lock_subvol = dht_get_lock_subvolume(this, lock, local);
+ if (!lock_subvol) {
+ gf_msg_debug(this->name, 0, "no lock subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- local->call_cnt = 1;
+ local->call_cnt = 1;
- STACK_WIND (frame,
- dht_inodelk_cbk,
- subvol, subvol->fops->inodelk,
- volume, loc, cmd, lock, xdata);
+ STACK_WIND(frame, dht_inodelk_cbk, lock_subvol, lock_subvol->fops->inodelk,
+ volume, loc, cmd, lock, xdata);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(inodelk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
-
int
-dht_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+dht_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
- return 0;
-}
+ dht_local_t *local = NULL;
+ int ret = 0;
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
-int
-dht_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ local = frame->local;
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- STACK_WIND (frame, dht_finodelk_cbk, subvol, subvol->fops->finodelk,
- volume, fd, cmd, lock, xdata);
+out:
+ dht_lk_inode_unref(frame, op_ret);
+ DHT_STACK_UNWIND(finodelk, frame, op_ret, op_errno, xdata);
- return 0;
+ return 0;
+}
+
+int
+dht_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ xlator_t *lock_subvol = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_INODELK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+ local->lock_type = lock->l_type;
+
+ lock_subvol = dht_get_lock_subvolume(this, lock, local);
+ if (!lock_subvol) {
+ gf_msg_debug(this->name, 0, "no lock subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /*
+ local->cached_subvol = lock_subvol;
+ ret = dht_check_and_open_fd_on_subvol (this, frame);
+ if (ret)
+ goto err;
+ */
+ local->rebalance.flock = *lock;
+ local->rebalance.lock_cmd = cmd;
+ local->key = gf_strdup(volume);
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ STACK_WIND(frame, dht_finodelk_cbk, lock_subvol,
+ lock_subvol->fops->finodelk, volume, fd, cmd, lock, xdata);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(finodelk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
index 2bfd80af3cd..2f23ce90fbd 100644
--- a/xlators/cluster/dht/src/dht-inode-write.c
+++ b/xlators/cluster/dht/src/dht-inode-write.c
@@ -8,1010 +8,1397 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "dht-common.h"
-int dht_writev2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_truncate2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_setattr2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_fallocate2(xlator_t *this, call_frame_t *frame, int op_ret);
-int dht_discard2(xlator_t *this, call_frame_t *frame, int op_ret);
-int dht_zerofill2(xlator_t *this, call_frame_t *frame, int op_ret);
+static int
+dht_writev2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
+static int
+dht_truncate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
+static int
+dht_setattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
+static int
+dht_fallocate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
+static int
+dht_discard2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
+static int
+dht_zerofill2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
int
-dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int ret = -1;
- xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ xlator_t *subvol1 = NULL;
+ xlator_t *subvol2 = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ /* writev fails with EBADF if dht has not yet opened the fd
+ * on the cached subvol. This could happen if the file was migrated
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could be a valid bad fd error.
+ */
+
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- if (op_ret == -1 && !dht_inode_missing(op_errno)) {
- goto out;
+ if (op_ret == -1 && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_msg_debug(this->name, 0, "subvolume %s returned -1 (%s)", prev->name,
+ strerror(op_errno));
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ /* preserve the modes of source */
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge(this, postbuf, &local->stbuf);
+ dht_iatt_merge(this, prebuf, &local->prebuf);
}
+ goto out;
+ }
- local = frame->local;
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ local->rebalance.target_op_fn = dht_writev2;
- if (local->call_cnt != 1) {
- /* preserve the modes of source */
- if (local->stbuf.ia_blocks) {
- dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
- dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
- }
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ /* We might need to pass the stbuf information to the higher DHT
+ * layer for appropriate handling.
+ */
+
+ dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata);
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
+ if (!local->xattr_req) {
+ local->xattr_req = dict_new();
+ if (!local->xattr_req) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
+ "insufficient memory");
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
goto out;
+ }
}
- local->rebalance.target_op_fn = dht_writev2;
-
- local->op_errno = op_errno;
- /* Phase 2 of migration */
- if (IS_DHT_MIGRATION_PHASE2 (postbuf)) {
- ret = dht_rebalance_complete_check (this, frame);
- if (!ret)
- return 0;
+ ret = dict_set_uint32(local->xattr_req, GF_PROTECT_FROM_EXTERNAL_WRITES,
+ 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0,
+ "Failed to set key %s in dictionary",
+ GF_PROTECT_FROM_EXTERNAL_WRITES);
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
}
- /* Check if the rebalance phase1 is true */
- if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
- dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
- dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ dht_iatt_merge(this, &local->stbuf, postbuf);
+ dht_iatt_merge(this, &local->prebuf, prebuf);
- ret = dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
- if (subvol) {
- dht_writev2 (this, frame, 0);
- return 0;
- }
- ret = dht_rebalance_in_progress_check (this, frame);
- if (!ret)
- return 0;
+ ret = dht_inode_ctx_get_mig_info(this, local->fd->inode, &subvol1,
+ &subvol2);
+ if (!dht_mig_info_is_invalid(local->cached_subvol, subvol1, subvol2)) {
+ if (dht_fd_open_on_dst(this, local->fd, subvol2)) {
+ dht_writev2(this, subvol2, frame, 0);
+ return 0;
+ }
}
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
out:
- DHT_STRIP_PHASE1_FLAGS (postbuf);
- DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STRIP_PHASE1_FLAGS(postbuf);
+ DHT_STRIP_PHASE1_FLAGS(prebuf);
- DHT_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ DHT_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
- return 0;
+ return 0;
}
-int
-dht_writev2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_writev2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if ((frame == NULL) || (frame->local == NULL))
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(writev, frame, local->op_ret, local->op_errno,
+ &local->rebalance.prebuf, &local->rebalance.postbuf,
+ local->rebalance.xdata);
+ return 0;
+ }
- local = frame->local;
+ if (subvol == NULL)
+ goto out;
- dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ local->call_cnt = 2; /* This is the second attempt */
- if (!subvol)
- subvol = local->cached_subvol;
+ STACK_WIND_COOKIE(frame, dht_writev_cbk, subvol, subvol,
+ subvol->fops->writev, local->fd, local->rebalance.vector,
+ local->rebalance.count, local->rebalance.offset,
+ local->rebalance.flags, local->rebalance.iobref,
+ local->xattr_req);
- local->call_cnt = 2; /* This is the second attempt */
+ return 0;
- STACK_WIND (frame, dht_writev_cbk,
- subvol, subvol->fops->writev,
- local->fd, local->rebalance.vector, local->rebalance.count,
- local->rebalance.offset, local->rebalance.flags,
- local->rebalance.iobref, NULL);
+out:
+ DHT_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
-dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int count, off_t off, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
+dht_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int count, off_t off, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_WRITE);
- if (!local) {
-
- op_errno = ENOMEM;
- goto err;
- }
-
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_WRITE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ local->rebalance.vector = iov_dup(vector, count);
+ local->rebalance.offset = off;
+ local->rebalance.count = count;
+ local->rebalance.flags = flags;
+ local->rebalance.iobref = iobref_ref(iobref);
+ local->call_cnt = 1;
+
+ STACK_WIND_COOKIE(frame, dht_writev_cbk, subvol, subvol,
+ subvol->fops->writev, fd, local->rebalance.vector,
+ local->rebalance.count, local->rebalance.offset,
+ local->rebalance.flags, local->rebalance.iobref,
+ local->xattr_req);
+
+ return 0;
- local->rebalance.vector = iov_dup (vector, count);
- local->rebalance.offset = off;
- local->rebalance.count = count;
- local->rebalance.flags = flags;
- local->rebalance.iobref = iobref_ref (iobref);
- local->call_cnt = 1;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
- STACK_WIND (frame, dht_writev_cbk,
- subvol, subvol->fops->writev,
- fd, vector, count, off, flags, iobref, xdata);
+ return 0;
+}
+int
+dht_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ xlator_t *src_subvol = NULL;
+ xlator_t *dst_subvol = NULL;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ /* Needs to be checked only for ftruncate.
+ * ftruncate fails with EBADF/EINVAL if dht has not yet opened the fd
+ * on the cached subvol. This could happen if the file was migrated
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could actually be a valid error.
+ */
+
+ if ((local->fop == GF_FOP_FTRUNCATE) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
return 0;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
- return 0;
-}
+ goto out;
+ }
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge(this, postbuf, &local->stbuf);
+ dht_iatt_merge(this, prebuf, &local->prebuf);
+ }
+ goto out;
+ }
+ local->rebalance.target_op_fn = dht_truncate2;
-int
-dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = -1;
- xlator_t *subvol = NULL;
- inode_t *inode = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
-
- local = frame->local;
- prev = cookie;
-
- if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
- local->op_errno = op_errno;
- local->op_ret = -1;
- gf_msg_debug (this->name, 0,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- goto out;
- }
+ /* We might need to pass the stbuf information to the higher DHT
+ * layer for appropriate handling.
+ */
- if (local->call_cnt != 1) {
- if (local->stbuf.ia_blocks) {
- dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
- dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
- }
- goto out;
- }
+ dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata);
- local->rebalance.target_op_fn = dht_truncate2;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
- local->op_errno = op_errno;
- /* Phase 2 of migration */
- if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
- ret = dht_rebalance_complete_check (this, frame);
- if (!ret)
- return 0;
- }
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
+ dht_iatt_merge(this, &local->stbuf, postbuf);
+ dht_iatt_merge(this, &local->prebuf, prebuf);
- /* Check if the rebalance phase1 is true */
- if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
- dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
- dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
- inode = (local->fd) ? local->fd->inode : local->loc.inode;
- dht_inode_ctx_get1 (this, inode, &subvol);
- if (subvol) {
- dht_truncate2 (this, frame, 0);
- return 0;
- }
- ret = dht_rebalance_in_progress_check (this, frame);
- if (!ret)
- return 0;
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+
+ dht_inode_ctx_get_mig_info(this, inode, &src_subvol, &dst_subvol);
+ if (!dht_mig_info_is_invalid(local->cached_subvol, src_subvol,
+ dst_subvol)) {
+ if ((!local->fd) ||
+ ((local->fd) &&
+ dht_fd_open_on_dst(this, local->fd, dst_subvol))) {
+ dht_truncate2(this, dst_subvol, frame, 0);
+ return 0;
+ }
}
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
out:
- DHT_STRIP_PHASE1_FLAGS (postbuf);
- DHT_STRIP_PHASE1_FLAGS (prebuf);
- DHT_STACK_UNWIND (truncate, frame, op_ret, op_errno,
- prebuf, postbuf, xdata);
+ DHT_STRIP_PHASE1_FLAGS(postbuf);
+ DHT_STRIP_PHASE1_FLAGS(prebuf);
+
+ DHT_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf, xdata);
err:
- return 0;
+ return 0;
}
-
-int
-dht_truncate2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_truncate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- inode_t *inode = NULL;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
- local = frame->local;
+ if (!frame || !frame->local)
+ goto out;
- inode = local->fd ? local->fd->inode : local->loc.inode;
+ local = frame->local;
+ op_errno = local->op_errno;
- dht_inode_ctx_get1 (this, inode, &subvol);
- if (!subvol)
- subvol = local->cached_subvol;
+ /* This dht xlator is not migrating the file */
+ if (we_are_not_migrating(ret)) {
+ DHT_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno,
+ &local->rebalance.prebuf, &local->rebalance.postbuf,
+ local->rebalance.xdata);
+ return 0;
+ }
- local->call_cnt = 2; /* This is the second attempt */
+ if (subvol == NULL)
+ goto out;
- if (local->fop == GF_FOP_TRUNCATE) {
- STACK_WIND (frame, dht_truncate_cbk, subvol,
- subvol->fops->truncate, &local->loc,
- local->rebalance.offset, NULL);
- } else {
- STACK_WIND (frame, dht_truncate_cbk, subvol,
- subvol->fops->ftruncate, local->fd,
- local->rebalance.offset, NULL);
- }
+ local->call_cnt = 2; /* This is the second attempt */
- return 0;
+ if (local->fop == GF_FOP_TRUNCATE) {
+ STACK_WIND_COOKIE(frame, dht_truncate_cbk, subvol, subvol,
+ subvol->fops->truncate, &local->loc,
+ local->rebalance.offset, local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_truncate_cbk, subvol, subvol,
+ subvol->fops->ftruncate, local->fd,
+ local->rebalance.offset, local->xattr_req);
+ }
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
int
-dht_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
+dht_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_TRUNCATE);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->rebalance.offset = offset;
- local->call_cnt = 1;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, dht_truncate_cbk,
- subvol, subvol->fops->truncate,
- loc, offset, xdata);
-
- return 0;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_TRUNCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for gfid=%s",
+ uuid_utoa(loc->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ STACK_WIND_COOKIE(frame, dht_truncate_cbk, subvol, subvol,
+ subvol->fops->truncate, loc, offset, xdata);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
-dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
+dht_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_FTRUNCATE);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->rebalance.offset = offset;
- local->call_cnt = 1;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, dht_truncate_cbk,
- subvol, subvol->fops->ftruncate,
- fd, offset, xdata);
-
- return 0;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FTRUNCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ STACK_WIND_COOKIE(frame, dht_truncate_cbk, subvol, subvol,
+ subvol->fops->ftruncate, fd, local->rebalance.offset,
+ local->xattr_req);
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
int
-dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = -1;
- xlator_t *subvol = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ xlator_t *src_subvol = NULL;
+ xlator_t *dst_subvol = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ /* fallocate fails with EBADF if dht has not yet opened the fd
+ * on the cached subvol. This could happen if the file was migrated
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could actually be a valid error.
+ */
+
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- local = frame->local;
- prev = cookie;
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
- if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
- local->op_errno = op_errno;
- local->op_ret = -1;
- gf_msg_debug (this->name, 0,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
+ goto out;
+ }
- goto out;
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge(this, postbuf, &local->stbuf);
+ dht_iatt_merge(this, prebuf, &local->prebuf);
}
+ goto out;
+ }
- if (local->call_cnt != 1) {
- if (local->stbuf.ia_blocks) {
- dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
- dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
- }
- goto out;
- }
- local->rebalance.target_op_fn = dht_fallocate2;
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->rebalance.target_op_fn = dht_fallocate2;
- /* Phase 2 of migration */
- if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
- ret = dht_rebalance_complete_check (this, frame);
- if (!ret)
- return 0;
- }
+ dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata);
- /* Check if the rebalance phase1 is true */
- if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
- dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
- dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
- dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
- if (subvol) {
- dht_fallocate2 (this, frame, 0);
- return 0;
- }
- ret = dht_rebalance_in_progress_check (this, frame);
- if (!ret)
- return 0;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
+ dht_iatt_merge(this, &local->stbuf, postbuf);
+ dht_iatt_merge(this, &local->prebuf, prebuf);
+
+ dht_inode_ctx_get_mig_info(this, local->fd->inode, &src_subvol,
+ &dst_subvol);
+ if (!dht_mig_info_is_invalid(local->cached_subvol, src_subvol,
+ dst_subvol)) {
+ if (dht_fd_open_on_dst(this, local->fd, dst_subvol)) {
+ dht_fallocate2(this, dst_subvol, frame, 0);
+ return 0;
+ }
}
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
out:
- DHT_STRIP_PHASE1_FLAGS (postbuf);
- DHT_STRIP_PHASE1_FLAGS (prebuf);
- DHT_STACK_UNWIND (fallocate, frame, op_ret, op_errno,
- prebuf, postbuf, xdata);
+ DHT_STRIP_PHASE1_FLAGS(postbuf);
+ DHT_STRIP_PHASE1_FLAGS(prebuf);
+
+ DHT_STACK_UNWIND(fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
err:
- return 0;
+ return 0;
}
-int
-dht_fallocate2(xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_fallocate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
-
- local = frame->local;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(fallocate, frame, local->op_ret, local->op_errno,
+ &local->rebalance.prebuf, &local->rebalance.postbuf,
+ local->rebalance.xdata);
+ return 0;
+ }
- dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ if (subvol == NULL)
+ goto out;
- if (!subvol)
- subvol = local->cached_subvol;
+ local->call_cnt = 2; /* This is the second attempt */
- local->call_cnt = 2; /* This is the second attempt */
+ STACK_WIND_COOKIE(frame, dht_fallocate_cbk, subvol, subvol,
+ subvol->fops->fallocate, local->fd,
+ local->rebalance.flags, local->rebalance.offset,
+ local->rebalance.size, local->xattr_req);
- STACK_WIND(frame, dht_fallocate_cbk, subvol, subvol->fops->fallocate,
- local->fd, local->rebalance.flags, local->rebalance.offset,
- local->rebalance.size, NULL);
+ return 0;
- return 0;
+out:
+ DHT_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
int
dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
- off_t offset, size_t len, dict_t *xdata)
+ off_t offset, size_t len, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_FALLOCATE);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->rebalance.flags = mode;
- local->rebalance.offset = offset;
- local->rebalance.size = len;
-
- local->call_cnt = 1;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, dht_fallocate_cbk,
- subvol, subvol->fops->fallocate,
- fd, mode, offset, len, xdata);
-
- return 0;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FALLOCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.flags = mode;
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ STACK_WIND_COOKIE(frame, dht_fallocate_cbk, subvol, subvol,
+ subvol->fops->fallocate, fd, local->rebalance.flags,
+ local->rebalance.offset, local->rebalance.size,
+ local->xattr_req);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
int
-dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = -1;
- xlator_t *subvol = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ xlator_t *src_subvol = NULL;
+ xlator_t *dst_subvol = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ /* discard fails with EBADF if dht has not yet opened the fd
+ * on the cached subvol. This could happen if the file was migrated
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could actually be a valid error.
+ */
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- local = frame->local;
- prev = cookie;
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
- if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
- local->op_errno = op_errno;
- local->op_ret = -1;
- gf_msg_debug (this->name, 0,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
+ goto out;
+ }
- goto out;
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge(this, postbuf, &local->stbuf);
+ dht_iatt_merge(this, prebuf, &local->prebuf);
}
+ goto out;
+ }
- if (local->call_cnt != 1) {
- if (local->stbuf.ia_blocks) {
- dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
- dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
- }
- goto out;
- }
- local->rebalance.target_op_fn = dht_discard2;
+ local->rebalance.target_op_fn = dht_discard2;
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- /* Phase 2 of migration */
- if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
- ret = dht_rebalance_complete_check (this, frame);
- if (!ret)
- return 0;
- }
+ dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata);
- /* Check if the rebalance phase1 is true */
- if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
- dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
- dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
- dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
- if (subvol) {
- dht_discard2 (this, frame, 0);
- return 0;
- }
- ret = dht_rebalance_in_progress_check (this, frame);
- if (!ret)
- return 0;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
+ dht_iatt_merge(this, &local->stbuf, postbuf);
+ dht_iatt_merge(this, &local->prebuf, prebuf);
+
+ dht_inode_ctx_get_mig_info(this, local->fd->inode, &src_subvol,
+ &dst_subvol);
+ if (!dht_mig_info_is_invalid(local->cached_subvol, src_subvol,
+ dst_subvol)) {
+ if (dht_fd_open_on_dst(this, local->fd, dst_subvol)) {
+ dht_discard2(this, dst_subvol, frame, 0);
+ return 0;
+ }
}
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
out:
- DHT_STRIP_PHASE1_FLAGS (postbuf);
- DHT_STRIP_PHASE1_FLAGS (prebuf);
- DHT_STACK_UNWIND (discard, frame, op_ret, op_errno,
- prebuf, postbuf, xdata);
+ DHT_STRIP_PHASE1_FLAGS(postbuf);
+ DHT_STRIP_PHASE1_FLAGS(prebuf);
+
+ DHT_STACK_UNWIND(discard, frame, op_ret, op_errno, prebuf, postbuf, xdata);
err:
- return 0;
+ return 0;
}
-int
-dht_discard2(xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_discard2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
-
- local = frame->local;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(discard, frame, local->op_ret, local->op_errno,
+ &local->rebalance.prebuf, &local->rebalance.postbuf,
+ local->rebalance.xdata);
+ return 0;
+ }
- dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ if (subvol == NULL)
+ goto out;
- if (!subvol)
- subvol = local->cached_subvol;
+ local->call_cnt = 2; /* This is the second attempt */
- local->call_cnt = 2; /* This is the second attempt */
+ STACK_WIND_COOKIE(frame, dht_discard_cbk, subvol, subvol,
+ subvol->fops->discard, local->fd, local->rebalance.offset,
+ local->rebalance.size, local->xattr_req);
- STACK_WIND(frame, dht_discard_cbk, subvol, subvol->fops->discard,
- local->fd, local->rebalance.offset, local->rebalance.size,
- NULL);
+ return 0;
- return 0;
+out:
+ DHT_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
int
dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- size_t len, dict_t *xdata)
+ size_t len, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_DISCARD);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- local->rebalance.offset = offset;
- local->rebalance.size = len;
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
- local->call_cnt = 1;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ local = dht_local_init(frame, NULL, fd, GF_FOP_DISCARD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- STACK_WIND (frame, dht_discard_cbk, subvol, subvol->fops->discard,
- fd, offset, len, xdata);
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
- return 0;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ STACK_WIND_COOKIE(frame, dht_discard_cbk, subvol, subvol,
+ subvol->fops->discard, fd, local->rebalance.offset,
+ local->rebalance.size, local->xattr_req);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
-dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ xlator_t *subvol1 = NULL, *subvol2 = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ /* zerofill fails with EBADF if dht has not yet opened the fd
+ * on the cached subvol. This could happen if the file was migrated
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could actually be a valid error.
+ */
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge(this, postbuf, &local->stbuf);
+ dht_iatt_merge(this, prebuf, &local->prebuf);
+ }
+ goto out;
+ }
- local = frame->local;
- prev = cookie;
+ local->rebalance.target_op_fn = dht_zerofill2;
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
- local->op_errno = op_errno;
- local->op_ret = -1;
- gf_msg_debug (this->name, 0,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto out;
- }
+ dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata);
- if (local->call_cnt != 1) {
- if (local->stbuf.ia_blocks) {
- dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
- dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
- }
- goto out;
- }
- local->rebalance.target_op_fn = dht_zerofill2;
- /* Phase 2 of migration */
- if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
- ret = dht_rebalance_complete_check (this, frame);
- if (!ret)
- return 0;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
+ dht_iatt_merge(this, &local->stbuf, postbuf);
+ dht_iatt_merge(this, &local->prebuf, prebuf);
+
+ ret = dht_inode_ctx_get_mig_info(this, local->fd->inode, &subvol1,
+ &subvol2);
+ if (!dht_mig_info_is_invalid(local->cached_subvol, subvol1, subvol2)) {
+ if (dht_fd_open_on_dst(this, local->fd, subvol2)) {
+ dht_zerofill2(this, subvol2, frame, 0);
+ return 0;
+ }
}
- /* Check if the rebalance phase1 is true */
- if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
- dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
- dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
- ret = fd_ctx_get (local->fd, this, NULL);
- if (!ret) {
- dht_zerofill2 (this, frame, 0);
- return 0;
- }
- ret = dht_rebalance_in_progress_check (this, frame);
- if (!ret)
- return 0;
- }
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
out:
- DHT_STRIP_PHASE1_FLAGS (postbuf);
- DHT_STRIP_PHASE1_FLAGS (prebuf);
- DHT_STACK_UNWIND (zerofill, frame, op_ret, op_errno,
- prebuf, postbuf, xdata);
+ DHT_STRIP_PHASE1_FLAGS(postbuf);
+ DHT_STRIP_PHASE1_FLAGS(prebuf);
+
+ DHT_STACK_UNWIND(zerofill, frame, op_ret, op_errno, prebuf, postbuf, xdata);
err:
- return 0;
+ return 0;
}
-int
-dht_zerofill2(xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_zerofill2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- uint64_t tmp_subvol = 0;
- int ret = -1;
-
- local = frame->local;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
- if (local->fd)
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (!ret)
- subvol = (xlator_t *)(long)tmp_subvol;
+ if (!frame || !frame->local)
+ goto out;
- if (!subvol)
- subvol = local->cached_subvol;
+ local = frame->local;
- local->call_cnt = 2; /* This is the second attempt */
+ op_errno = local->op_errno;
- STACK_WIND(frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill,
- local->fd, local->rebalance.offset, local->rebalance.size,
- NULL);
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(zerofill, frame, local->op_ret, local->op_errno,
+ &local->rebalance.prebuf, &local->rebalance.postbuf,
+ local->rebalance.xdata);
return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND_COOKIE(frame, dht_zerofill_cbk, subvol, subvol,
+ subvol->fops->zerofill, local->fd,
+ local->rebalance.offset, local->rebalance.size,
+ local->xattr_req);
+
+ return 0;
+
+out:
+
+ DHT_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
int
dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- off_t len, dict_t *xdata)
+ off_t len, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_ZEROFILL);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- local->rebalance.offset = offset;
- local->rebalance.size = len;
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
- local->call_cnt = 1;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ local = dht_local_init(frame, NULL, fd, GF_FOP_ZEROFILL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- STACK_WIND (frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill,
- fd, offset, len, xdata);
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
- return 0;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
- return 0;
-}
+ STACK_WIND_COOKIE(frame, dht_zerofill_cbk, subvol, subvol,
+ subvol->fops->zerofill, fd, local->rebalance.offset,
+ local->rebalance.size, local->xattr_req);
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
/* handle cases of migration here for 'setattr()' calls */
int
-dht_file_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+dht_file_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
- local = frame->local;
- prev = cookie;
+ local = frame->local;
+ prev = cookie;
- local->op_errno = op_errno;
- if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
- gf_msg_debug (this->name, 0,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto out;
- }
+ local->op_errno = op_errno;
- if (local->call_cnt != 1)
- goto out;
+ if ((local->fop == GF_FOP_FSETATTR) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- local->rebalance.target_op_fn = dht_setattr2;
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto out;
+ }
- /* Phase 2 of migration */
- if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
- ret = dht_rebalance_complete_check (this, frame);
- if (!ret)
- return 0;
- }
+ if (local->call_cnt != 1)
+ goto out;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ local->rebalance.target_op_fn = dht_setattr2;
- /* At the end of the migration process, whatever 'attr' we
- have on source file will be migrated to destination file
- in one shot, hence we don't need to check for in progress
- state here (ie, PHASE1) */
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) {
+ dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata);
+
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* At the end of the migration process, whatever 'attr' we
+ have on source file will be migrated to destination file
+ in one shot, hence we don't need to check for in progress
+ state here (ie, PHASE1) */
out:
- DHT_STRIP_PHASE1_FLAGS (postbuf);
- DHT_STRIP_PHASE1_FLAGS (prebuf);
- DHT_STACK_UNWIND (setattr, frame, op_ret, op_errno,
- prebuf, postbuf, xdata);
+ DHT_STRIP_PHASE1_FLAGS(postbuf);
+ DHT_STRIP_PHASE1_FLAGS(prebuf);
- return 0;
+ DHT_STACK_UNWIND(setattr, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
}
-int
-dht_setattr2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_setattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- inode_t *inode = NULL;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno,
+ &local->rebalance.prebuf, &local->rebalance.postbuf,
+ local->rebalance.xdata);
+ return 0;
+ }
- local = frame->local;
+ if (subvol == NULL)
+ goto out;
- inode = (local->fd) ? local->fd->inode : local->loc.inode;
+ local->call_cnt = 2; /* This is the second attempt */
- dht_inode_ctx_get1 (this, inode, &subvol);
+ if (local->fop == GF_FOP_SETATTR) {
+ STACK_WIND_COOKIE(frame, dht_file_setattr_cbk, subvol, subvol,
+ subvol->fops->setattr, &local->loc,
+ &local->rebalance.stbuf, local->rebalance.flags,
+ local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_file_setattr_cbk, subvol, subvol,
+ subvol->fops->fsetattr, local->fd,
+ &local->rebalance.stbuf, local->rebalance.flags,
+ local->xattr_req);
+ }
- if (!subvol)
- subvol = local->cached_subvol;
+ return 0;
- local->call_cnt = 2; /* This is the second attempt */
+out:
+ DHT_STACK_UNWIND(setattr, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
- if (local->fop == GF_FOP_SETATTR) {
- STACK_WIND (frame, dht_file_setattr_cbk, subvol,
- subvol->fops->setattr, &local->loc,
- &local->rebalance.stbuf, local->rebalance.flags,
- NULL);
- } else {
- STACK_WIND (frame, dht_file_setattr_cbk, subvol,
- subvol->fops->fsetattr, local->fd,
- &local->rebalance.stbuf, local->rebalance.flags,
- NULL);
+/* Keep the existing code same for all the cases other than regular file */
+int
+dht_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto post_unlock;
}
- return 0;
+ dht_iatt_merge(this, &local->prebuf, statpre);
+ dht_iatt_merge(this, &local->stbuf, statpost);
+
+ local->op_ret = 0;
+ local->op_errno = 0;
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ if (local->op_ret == 0)
+ dht_inode_ctx_time_set(local->loc.inode, this, &local->stbuf);
+ DHT_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno,
+ &local->prebuf, &local->stbuf, xdata);
+ }
+
+ return 0;
}
-
/* Keep the existing code same for all the cases other than regular file */
int
-dht_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata)
+dht_non_mds_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
-
-
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_msg_debug (this->name, 0,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ gf_msg(this->name, op_errno, 0, 0, "subvolume %s returned -1",
+ prev->name);
+ goto post_unlock;
+ }
+
+ LOCK(&frame->lock);
+ {
+ dht_iatt_merge(this, &local->prebuf, statpre);
+ dht_iatt_merge(this, &local->stbuf, statpost);
+
+ local->op_ret = 0;
+ local->op_errno = 0;
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ dht_inode_ctx_time_set(local->loc.inode, this, &local->stbuf);
+ DHT_STACK_UNWIND(setattr, frame, 0, 0, &local->prebuf, &local->stbuf,
+ xdata);
+ }
+
+ return 0;
+}
- dht_iatt_merge (this, &local->prebuf, statpre, prev->this);
- dht_iatt_merge (this, &local->stbuf, statpost, prev->this);
+int
+dht_mds_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
-
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- if (local->op_ret == 0)
- dht_inode_ctx_time_set (local->loc.inode, this,
- &local->stbuf);
- DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno,
- &local->prebuf, &local->stbuf, xdata);
- }
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *mds_subvol = NULL;
+ struct iatt loc_stbuf = {
+ 0,
+ };
+ int i = 0;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+ mds_subvol = local->mds_subvol;
+
+ if (op_ret == -1) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto out;
+ }
+
+ local->op_ret = 0;
+ loc_stbuf = local->stbuf;
+ dht_iatt_merge(this, &local->prebuf, statpre);
+ dht_iatt_merge(this, &local->stbuf, statpost);
+
+ local->call_cnt = conf->subvolume_cnt - 1;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (mds_subvol == conf->subvolumes[i])
+ continue;
+ STACK_WIND_COOKIE(frame, dht_non_mds_setattr_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->setattr, &local->loc,
+ &loc_stbuf, local->valid, local->xattr_req);
+ }
+
+ return 0;
+out:
+ DHT_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno,
+ &local->prebuf, &local->stbuf, xdata);
- return 0;
+ return 0;
}
-
int
-dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
+dht_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
- int call_cnt = 0;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_SETATTR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ xlator_t *mds_subvol = NULL;
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int ret = -1;
+ int call_cnt = 0;
+ dht_conf_t *conf = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+
+ conf = this->private;
+ local = dht_local_init(frame, loc, NULL, GF_FOP_SETATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0, "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane(layout)) {
+ gf_msg_debug(this->name, 0, "layout is not sane for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ if (IA_ISREG(loc->inode->ia_type)) {
+ /* in the regular file _cbk(), we need to check for
+ migration possibilities */
+ local->rebalance.stbuf = *stbuf;
+ local->rebalance.flags = valid;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
- layout = local->layout;
- if (!layout) {
- gf_msg_debug (this->name, 0,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ STACK_WIND_COOKIE(frame, dht_file_setattr_cbk, subvol, subvol,
+ subvol->fops->setattr, loc, stbuf, valid, xdata);
- if (!layout_is_sane (layout)) {
- gf_msg_debug (this->name, 0,
- "layout is not sane for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ if (IA_ISDIR(loc->inode->ia_type) && !__is_root_gfid(loc->inode->gfid) &&
+ call_cnt != 1) {
+ ret = dht_inode_ctx_mdsvol_get(loc->inode, this, &mds_subvol);
+ if (ret || !mds_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get mds subvol for path %s", local->loc.path);
+ op_errno = EINVAL;
+ goto err;
}
- if (IA_ISREG (loc->inode->ia_type)) {
- /* in the regular file _cbk(), we need to check for
- migration possibilities */
- local->rebalance.stbuf = *stbuf;
- local->rebalance.flags = valid;
- local->call_cnt = 1;
- subvol = local->cached_subvol;
-
- STACK_WIND (frame, dht_file_setattr_cbk, subvol,
- subvol->fops->setattr,
- loc, stbuf, valid, xdata);
-
- return 0;
+ local->mds_subvol = mds_subvol;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == mds_subvol) {
+ if (!conf->subvolume_status[i]) {
+ gf_msg(this->name, GF_LOG_WARNING, layout->list[i].err,
+ DHT_MSG_HASHED_SUBVOL_DOWN,
+ "MDS subvol is down for path "
+ " %s Unable to set attr ",
+ local->loc.path);
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ }
}
+ local->valid = valid;
+ local->stbuf = *stbuf;
- local->call_cnt = call_cnt = layout->cnt;
-
+ STACK_WIND_COOKIE(frame, dht_mds_setattr_cbk, local->mds_subvol,
+ local->mds_subvol, local->mds_subvol->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
+ } else {
for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_setattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->setattr,
- loc, stbuf, valid, xdata);
+ STACK_WIND_COOKIE(frame, dht_setattr_cbk, layout->list[i].xlator,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->setattr, loc, stbuf,
+ valid, xdata);
}
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(setattr, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
int
-dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
- int32_t valid, dict_t *xdata)
+dht_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
- int call_cnt = 0;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_FSETATTR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- layout = local->layout;
- if (!layout) {
- gf_msg_debug (this->name, 0,
- "no layout for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- if (!layout_is_sane (layout)) {
- gf_msg_debug (this->name, 0,
- "layout is not sane for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- if (IA_ISREG (fd->inode->ia_type)) {
- /* in the regular file _cbk(), we need to check for
- migration possibilities */
- local->rebalance.stbuf = *stbuf;
- local->rebalance.flags = valid;
- local->call_cnt = 1;
- subvol = local->cached_subvol;
-
- STACK_WIND (frame, dht_file_setattr_cbk, subvol,
- subvol->fops->fsetattr,
- fd, stbuf, valid, xdata);
+ xlator_t *subvol = NULL;
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FSETATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0, "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane(layout)) {
+ gf_msg_debug(this->name, 0, "layout is not sane for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ if (IA_ISREG(fd->inode->ia_type)) {
+ /* in the regular file _cbk(), we need to check for
+ migration possibilities */
+ local->rebalance.stbuf = *stbuf;
+ local->rebalance.flags = valid;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
- return 0;
- }
+ STACK_WIND_COOKIE(frame, dht_file_setattr_cbk, subvol, subvol,
+ subvol->fops->fsetattr, fd, &local->rebalance.stbuf,
+ local->rebalance.flags, local->xattr_req);
+ return 0;
+ }
- local->call_cnt = call_cnt = layout->cnt;
+ local->call_cnt = call_cnt = layout->cnt;
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_setattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->fsetattr,
- fd, stbuf, valid, xdata);
- }
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_setattr_cbk, layout->list[i].xlator,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->fsetattr, fd, stbuf,
+ valid, xdata);
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
index 2ed15c5e43c..fda904c92c9 100644
--- a/xlators/cluster/dht/src/dht-layout.c
+++ b/xlators/cluster/dht/src/dht-layout.c
@@ -8,834 +8,801 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
-#include "byte-order.h"
-#include "dht-messages.h"
+#include <glusterfs/byte-order.h>
#include "unittest/unittest.h"
+#define layout_base_size (sizeof(dht_layout_t))
-#define layout_base_size (sizeof (dht_layout_t))
-
-#define layout_entry_size (sizeof ((dht_layout_t *)NULL)->list[0])
+#define layout_entry_size (sizeof((dht_layout_t *)NULL)->list[0])
#define layout_size(cnt) (layout_base_size + (cnt * layout_entry_size))
dht_layout_t *
-dht_layout_new (xlator_t *this, int cnt)
+dht_layout_new(xlator_t *this, int cnt)
{
- dht_layout_t *layout = NULL;
- dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
- REQUIRE(NULL != this);
- REQUIRE(cnt >= 0);
+ REQUIRE(NULL != this);
+ REQUIRE(cnt >= 0);
- conf = this->private;
+ conf = this->private;
- layout = GF_CALLOC (1, layout_size (cnt),
- gf_dht_mt_dht_layout_t);
- if (!layout) {
- goto out;
- }
+ layout = GF_CALLOC(1, layout_size(cnt), gf_dht_mt_dht_layout_t);
+ if (!layout) {
+ goto out;
+ }
- layout->type = DHT_HASH_TYPE_DM;
- layout->cnt = cnt;
+ layout->type = DHT_HASH_TYPE_DM;
+ layout->cnt = cnt;
- if (conf) {
- layout->spread_cnt = conf->dir_spread_cnt;
- layout->gen = conf->gen;
- }
+ if (conf) {
+ layout->spread_cnt = conf->dir_spread_cnt;
+ layout->gen = conf->gen;
+ }
- layout->ref = 1;
+ GF_ATOMIC_INIT(layout->ref, 1);
- ENSURE(NULL != layout);
- ENSURE(layout->type == DHT_HASH_TYPE_DM);
- ENSURE(layout->cnt == cnt);
- ENSURE(layout->ref == 1);
+ ENSURE(NULL != layout);
+ ENSURE(layout->type == DHT_HASH_TYPE_DM);
+ ENSURE(layout->cnt == cnt);
+ ENSURE(GF_ATOMIC_GET(layout->ref) == 1);
out:
- return layout;
+ return layout;
}
-
dht_layout_t *
-dht_layout_get (xlator_t *this, inode_t *inode)
+dht_layout_get(xlator_t *this, inode_t *inode)
{
- dht_conf_t *conf = NULL;
- dht_layout_t *layout = NULL;
- int ret = 0;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- LOCK (&conf->layout_lock);
- {
- ret = dht_inode_ctx_layout_get (inode, this, &layout);
- if ((!ret) && layout) {
- layout->ref++;
- }
- }
- UNLOCK (&conf->layout_lock);
-
-out:
- return layout;
+ dht_layout_t *layout = NULL;
+ int ret = 0;
+
+ ret = dht_inode_ctx_layout_get(inode, this, &layout);
+ if ((!ret) && layout) {
+ GF_ATOMIC_INC(layout->ref);
+ }
+ return layout;
}
-
int
-dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout)
+dht_layout_set(xlator_t *this, inode_t *inode, dht_layout_t *layout)
{
- dht_conf_t *conf = NULL;
- int oldret = -1;
- int ret = 0;
- dht_layout_t *old_layout;
-
- conf = this->private;
- if (!conf || !layout)
- goto out;
-
- LOCK (&conf->layout_lock);
- {
- oldret = dht_inode_ctx_layout_get (inode, this, &old_layout);
- if (layout)
- layout->ref++;
- dht_inode_ctx_layout_set (inode, this, layout);
- }
- UNLOCK (&conf->layout_lock);
-
- if (!oldret) {
- dht_layout_unref (this, old_layout);
- }
+ dht_conf_t *conf = NULL;
+ int oldret = -1;
+ int ret = -1;
+ dht_layout_t *old_layout;
+
+ conf = this->private;
+ if (!conf || !layout)
+ goto out;
+
+ LOCK(&conf->layout_lock);
+ {
+ oldret = dht_inode_ctx_layout_get(inode, this, &old_layout);
+ if (layout)
+ GF_ATOMIC_INC(layout->ref);
+ ret = dht_inode_ctx_layout_set(inode, this, layout);
+ }
+ UNLOCK(&conf->layout_lock);
+
+ if (!oldret) {
+ dht_layout_unref(this, old_layout);
+ }
+ if (ret)
+ GF_ATOMIC_DEC(layout->ref);
out:
- return ret;
+ return ret;
}
-
void
-dht_layout_unref (xlator_t *this, dht_layout_t *layout)
+dht_layout_unref(xlator_t *this, dht_layout_t *layout)
{
- dht_conf_t *conf = NULL;
- int ref = 0;
-
- if (!layout || layout->preset || !this->private)
- return;
+ int ref = 0;
- conf = this->private;
+ if (!layout || layout->preset || !this->private)
+ return;
- LOCK (&conf->layout_lock);
- {
- ref = --layout->ref;
- }
- UNLOCK (&conf->layout_lock);
+ ref = GF_ATOMIC_DEC(layout->ref);
- if (!ref)
- GF_FREE (layout);
+ if (!ref)
+ GF_FREE(layout);
}
-
dht_layout_t *
-dht_layout_ref (xlator_t *this, dht_layout_t *layout)
+dht_layout_ref(xlator_t *this, dht_layout_t *layout)
{
- dht_conf_t *conf = NULL;
-
- if (layout->preset || !this->private)
- return layout;
+ if (layout->preset || !this->private)
+ return layout;
- conf = this->private;
- LOCK (&conf->layout_lock);
- {
- layout->ref++;
- }
- UNLOCK (&conf->layout_lock);
+ GF_ATOMIC_INC(layout->ref);
- return layout;
+ return layout;
}
-
xlator_t *
-dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name)
+dht_layout_search(xlator_t *this, dht_layout_t *layout, const char *name)
{
- uint32_t hash = 0;
- xlator_t *subvol = NULL;
- int i = 0;
- int ret = 0;
-
- ret = dht_hash_compute (this, layout->type, name, &hash);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "hash computation failed for type=%d name=%s",
- layout->type, name);
- goto out;
- }
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].start <= hash
- && layout->list[i].stop >= hash) {
- subvol = layout->list[i].xlator;
- break;
- }
- }
-
- if (!subvol) {
- gf_log (this->name, GF_LOG_WARNING,
- "no subvolume for hash (value) = %u", hash);
- }
+ uint32_t hash = 0;
+ xlator_t *subvol = NULL;
+ int i = 0;
+ int ret = 0;
+
+ ret = dht_hash_compute(this, layout->type, name, &hash);
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMPUTE_HASH_FAILED,
+ "type=%d", layout->type, "name=%s", name, NULL);
+ goto out;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].start <= hash && layout->list[i].stop >= hash) {
+ subvol = layout->list[i].xlator;
+ break;
+ }
+ }
+
+ if (!subvol) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "hash-value=0x%x", hash, NULL);
+ }
out:
- return subvol;
+ return subvol;
}
-
dht_layout_t *
-dht_layout_for_subvol (xlator_t *this, xlator_t *subvol)
+dht_layout_for_subvol(xlator_t *this, xlator_t *subvol)
{
- dht_conf_t *conf = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == subvol) {
- layout = conf->file_layouts[i];
- break;
- }
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == subvol) {
+ layout = conf->file_layouts[i];
+ break;
}
+ }
out:
- return layout;
+ return layout;
}
-
int
-dht_layouts_init (xlator_t *this, dht_conf_t *conf)
+dht_layouts_init(xlator_t *this, dht_conf_t *conf)
{
- dht_layout_t *layout = NULL;
- int i = 0;
- int ret = -1;
-
- if (!conf)
- goto out;
-
- conf->file_layouts = GF_CALLOC (conf->subvolume_cnt,
- sizeof (dht_layout_t *),
- gf_dht_mt_dht_layout_t);
- if (!conf->file_layouts) {
- goto out;
- }
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- layout = dht_layout_new (this, 1);
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int ret = -1;
- if (!layout) {
- goto out;
- }
+ if (!conf)
+ goto out;
- layout->preset = 1;
+ conf->file_layouts = GF_CALLOC(conf->subvolume_cnt, sizeof(dht_layout_t *),
+ gf_dht_mt_dht_layout_t);
+ if (!conf->file_layouts) {
+ goto out;
+ }
- layout->list[0].xlator = conf->subvolumes[i];
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ layout = dht_layout_new(this, 1);
- conf->file_layouts[i] = layout;
+ if (!layout) {
+ goto out;
}
- ret = 0;
+ layout->preset = 1;
+
+ layout->list[0].xlator = conf->subvolumes[i];
+
+ conf->file_layouts[i] = layout;
+ }
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
- int pos, int32_t **disk_layout_p)
+dht_disk_layout_extract(xlator_t *this, dht_layout_t *layout, int pos,
+ int32_t **disk_layout_p)
{
- int ret = -1;
- int32_t *disk_layout = NULL;
+ int ret = -1;
+ int32_t *disk_layout = NULL;
- disk_layout = GF_CALLOC (5, sizeof (int),
- gf_dht_mt_int32_t);
- if (!disk_layout) {
- goto out;
- }
+ disk_layout = GF_CALLOC(5, sizeof(int), gf_dht_mt_int32_t);
+ if (!disk_layout) {
+ goto out;
+ }
- disk_layout[0] = hton32 (1);
- disk_layout[1] = hton32 (layout->type);
- disk_layout[2] = hton32 (layout->list[pos].start);
- disk_layout[3] = hton32 (layout->list[pos].stop);
+ disk_layout[0] = hton32(layout->list[pos].commit_hash);
+ disk_layout[1] = hton32(layout->type);
+ disk_layout[2] = hton32(layout->list[pos].start);
+ disk_layout[3] = hton32(layout->list[pos].stop);
- if (disk_layout_p)
- *disk_layout_p = disk_layout;
- else
- GF_FREE (disk_layout);
+ if (disk_layout_p)
+ *disk_layout_p = disk_layout;
+ else
+ GF_FREE(disk_layout);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
- int pos, void *disk_layout_raw, int disk_layout_len)
+dht_disk_layout_extract_for_subvol(xlator_t *this, dht_layout_t *layout,
+ xlator_t *subvol, int32_t **disk_layout_p)
{
- int cnt = 0;
- int type = 0;
- int start_off = 0;
- int stop_off = 0;
- int disk_layout[4];
-
- if (!disk_layout_raw) {
- gf_msg (this->name, GF_LOG_CRITICAL, 0,
- DHT_MSG_LAYOUT_MERGE_FAILED,
- "error no layout on disk for merge");
- return -1;
- }
-
- GF_ASSERT (disk_layout_len == sizeof (disk_layout));
-
- memcpy (disk_layout, disk_layout_raw, disk_layout_len);
-
- cnt = ntoh32 (disk_layout[0]);
- if (cnt != 1) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_INVALID_DISK_LAYOUT,
- "Invalid disk layout: Invalid count %d", cnt);
- return -1;
- }
+ int i = 0;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol)
+ break;
+ }
+
+ if (i == layout->cnt)
+ return -1;
+
+ return dht_disk_layout_extract(this, layout, i, disk_layout_p);
+}
+
+static int
+dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
+ void *disk_layout_raw, int disk_layout_len)
+{
+ int type = 0;
+ int start_off = 0;
+ int stop_off = 0;
+ int commit_hash = 0;
+ int disk_layout[4];
+
+ if (!disk_layout_raw) {
+ gf_smsg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ NULL);
+ return -1;
+ }
- type = ntoh32 (disk_layout[1]);
- switch (type) {
+ GF_ASSERT(disk_layout_len == sizeof(disk_layout));
+
+ memcpy(disk_layout, disk_layout_raw, disk_layout_len);
+
+ type = ntoh32(disk_layout[1]);
+ switch (type) {
case DHT_HASH_TYPE_DM_USER:
- gf_msg_debug (this->name, 0, "found user-set layout");
- layout->type = type;
- /* Fall through. */
- case DHT_HASH_TYPE_DM:
- break;
+ gf_msg_debug(this->name, 0, "found user-set layout");
+ layout->type = type;
+ /* Fall through. */
+ case DHT_HASH_TYPE_DM:
+ break;
default:
- gf_msg (this->name, GF_LOG_CRITICAL, 0,
- DHT_MSG_INVALID_DISK_LAYOUT,
- "Invalid disk layout: "
- "Catastrophic error layout with unknown type found %d",
- disk_layout[1]);
- return -1;
- }
-
- start_off = ntoh32 (disk_layout[2]);
- stop_off = ntoh32 (disk_layout[3]);
-
- layout->list[pos].start = start_off;
- layout->list[pos].stop = stop_off;
-
- gf_msg_trace (this->name, 0,
- "merged to layout: %u - %u (type %d) from %s",
- start_off, stop_off, type,
- layout->list[pos].xlator->name);
-
- return 0;
-}
+ gf_smsg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_INVALID_DISK_LAYOUT,
+ "layout=%d", disk_layout[1], NULL);
+ return -1;
+ }
+
+ commit_hash = ntoh32(disk_layout[0]);
+ start_off = ntoh32(disk_layout[2]);
+ stop_off = ntoh32(disk_layout[3]);
+ layout->list[pos].commit_hash = commit_hash;
+ layout->list[pos].start = start_off;
+ layout->list[pos].stop = stop_off;
+
+ gf_msg_trace(this->name, 0,
+ "merged to layout: 0x%x - 0x%x (hash 0x%x, type %d) from %s",
+ start_off, stop_off, commit_hash, type,
+ layout->list[pos].xlator->name);
+
+ return 0;
+}
int
-dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
- int op_ret, int op_errno, dict_t *xattr)
+dht_layout_merge(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
+ int op_ret, int op_errno, dict_t *xattr)
{
- int i = 0;
- int ret = -1;
- int err = -1;
- void *disk_layout_raw = NULL;
- int disk_layout_len = 0;
- dht_conf_t *conf = this->private;
-
- if (op_ret != 0) {
- err = op_errno;
- }
+ int i = 0;
+ int ret = -1;
+ int err = -1;
+ void *disk_layout_raw = NULL;
+ int disk_layout_len = 0;
+ dht_conf_t *conf = this->private;
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].xlator == NULL) {
- layout->list[i].err = err;
- layout->list[i].xlator = subvol;
- break;
- }
- }
+ if (op_ret != 0) {
+ err = op_errno;
+ }
- if (op_ret != 0) {
- ret = 0;
- goto out;
- }
+ if (!layout)
+ goto out;
- if (xattr) {
- /* during lookup and not mkdir */
- ret = dict_get_ptr_and_len (xattr, conf->xattr_name,
- &disk_layout_raw, &disk_layout_len);
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == NULL) {
+ layout->list[i].err = err;
+ layout->list[i].xlator = subvol;
+ break;
}
+ }
- if (ret != 0) {
- layout->list[i].err = 0;
- gf_msg_trace (this->name, 0,
- "Missing disk layout on %s. err = %d",
- subvol->name, err);
- ret = 0;
- goto out;
- }
+ if (op_ret != 0) {
+ ret = 0;
+ goto out;
+ }
- ret = dht_disk_layout_merge (this, layout, i, disk_layout_raw,
- disk_layout_len);
- if (ret != 0) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_LAYOUT_MERGE_FAILED,
- "layout merge from subvolume %s failed",
- subvol->name);
- goto out;
- }
+ if (xattr) {
+ /* during lookup and not mkdir */
+ ret = dict_get_ptr_and_len(xattr, conf->xattr_name, &disk_layout_raw,
+ &disk_layout_len);
+ }
+
+ if (ret != 0) {
layout->list[i].err = 0;
+ gf_msg_trace(this->name, 0, "Missing disk layout on %s. err = %d",
+ subvol->name, err);
+ ret = 0;
+ goto out;
+ }
+
+ ret = dht_disk_layout_merge(this, layout, i, disk_layout_raw,
+ disk_layout_len);
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ "subvolume=%s", subvol->name, NULL);
+ goto out;
+ }
+
+ if (layout->commit_hash == 0) {
+ layout->commit_hash = layout->list[i].commit_hash;
+ } else if (layout->commit_hash != layout->list[i].commit_hash) {
+ layout->commit_hash = DHT_LAYOUT_HASH_INVALID;
+ }
+
+ layout->list[i].err = 0;
out:
- return ret;
+ return ret;
}
-
void
-dht_layout_entry_swap (dht_layout_t *layout, int i, int j)
+dht_layout_entry_swap(dht_layout_t *layout, int i, int j)
{
- uint32_t start_swap = 0;
- uint32_t stop_swap = 0;
- xlator_t *xlator_swap = 0;
- int err_swap = 0;
-
- start_swap = layout->list[i].start;
- stop_swap = layout->list[i].stop;
- xlator_swap = layout->list[i].xlator;
- err_swap = layout->list[i].err;
-
- layout->list[i].start = layout->list[j].start;
- layout->list[i].stop = layout->list[j].stop;
- layout->list[i].xlator = layout->list[j].xlator;
- layout->list[i].err = layout->list[j].err;
-
- layout->list[j].start = start_swap;
- layout->list[j].stop = stop_swap;
- layout->list[j].xlator = xlator_swap;
- layout->list[j].err = err_swap;
+ uint32_t start_swap = 0;
+ uint32_t stop_swap = 0;
+ uint32_t commit_hash_swap = 0;
+ xlator_t *xlator_swap = 0;
+ int err_swap = 0;
+
+ start_swap = layout->list[i].start;
+ stop_swap = layout->list[i].stop;
+ xlator_swap = layout->list[i].xlator;
+ err_swap = layout->list[i].err;
+ commit_hash_swap = layout->list[i].commit_hash;
+
+ layout->list[i].start = layout->list[j].start;
+ layout->list[i].stop = layout->list[j].stop;
+ layout->list[i].xlator = layout->list[j].xlator;
+ layout->list[i].err = layout->list[j].err;
+ layout->list[i].commit_hash = layout->list[j].commit_hash;
+
+ layout->list[j].start = start_swap;
+ layout->list[j].stop = stop_swap;
+ layout->list[j].xlator = xlator_swap;
+ layout->list[j].err = err_swap;
+ layout->list[j].commit_hash = commit_hash_swap;
}
void
-dht_layout_range_swap (dht_layout_t *layout, int i, int j)
+dht_layout_range_swap(dht_layout_t *layout, int i, int j)
{
- uint32_t start_swap = 0;
- uint32_t stop_swap = 0;
+ uint32_t start_swap = 0;
+ uint32_t stop_swap = 0;
- start_swap = layout->list[i].start;
- stop_swap = layout->list[i].stop;
+ start_swap = layout->list[i].start;
+ stop_swap = layout->list[i].stop;
- layout->list[i].start = layout->list[j].start;
- layout->list[i].stop = layout->list[j].stop;
+ layout->list[i].start = layout->list[j].start;
+ layout->list[i].stop = layout->list[j].stop;
- layout->list[j].start = start_swap;
- layout->list[j].stop = stop_swap;
+ layout->list[j].start = start_swap;
+ layout->list[j].stop = stop_swap;
}
-
-int64_t
-dht_layout_entry_cmp_volname (dht_layout_t *layout, int i, int j)
+static int64_t
+dht_layout_entry_cmp_volname(dht_layout_t *layout, int i, int j)
{
- return (strcmp (layout->list[i].xlator->name,
- layout->list[j].xlator->name));
+ return (strcmp(layout->list[i].xlator->name, layout->list[j].xlator->name));
}
gf_boolean_t
-dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator)
+dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator)
{
- int i = 0;
-
- for (i = 0; i < layout->cnt; i++) {
- /* Check if xlator is already part of layout, and layout is
- * non-zero. */
- if (!strcmp (layout->list[i].xlator->name, xlator->name)) {
- if (layout->list[i].start != layout->list[i].stop)
- return _gf_true;
- break;
- }
- }
- return _gf_false;
+ int i = 0;
+
+ for (i = 0; i < layout->cnt; i++) {
+ /* Check if xlator is already part of layout, and layout is
+ * non-zero. */
+ if (!strcmp(layout->list[i].xlator->name, xlator->name)) {
+ if (layout->list[i].start != layout->list[i].stop)
+ return _gf_true;
+ break;
+ }
+ }
+ return _gf_false;
}
-int64_t
-dht_layout_entry_cmp (dht_layout_t *layout, int i, int j)
+static int64_t
+dht_layout_entry_cmp(dht_layout_t *layout, int i, int j)
{
- int64_t diff = 0;
+ int64_t diff = 0;
- /* swap zero'ed out layouts to front, if needed */
- if (!layout->list[j].start && !layout->list[j].stop) {
- diff = (int64_t) layout->list[i].stop
- - (int64_t) layout->list[j].stop;
- goto out;
- }
- diff = (int64_t) layout->list[i].start
- - (int64_t) layout->list[j].start;
+ /* swap zero'ed out layouts to front, if needed */
+ if (!layout->list[j].start && !layout->list[j].stop) {
+ diff = (int64_t)layout->list[i].stop - (int64_t)layout->list[j].stop;
+ goto out;
+ }
+ diff = (int64_t)layout->list[i].start - (int64_t)layout->list[j].start;
out:
- return diff;
+ return diff;
}
-
int
-dht_layout_sort (dht_layout_t *layout)
+dht_layout_sort(dht_layout_t *layout)
{
- int i = 0;
- int j = 0;
- int64_t ret = 0;
+ int i = 0;
+ int j = 0;
+ int64_t ret = 0;
- /* TODO: O(n^2) -- bad bad */
+ /* TODO: O(n^2) -- bad bad */
- for (i = 0; i < layout->cnt - 1; i++) {
- for (j = i + 1; j < layout->cnt; j++) {
- ret = dht_layout_entry_cmp (layout, i, j);
- if (ret > 0)
- dht_layout_entry_swap (layout, i, j);
- }
+ for (i = 0; i < layout->cnt - 1; i++) {
+ for (j = i + 1; j < layout->cnt; j++) {
+ ret = dht_layout_entry_cmp(layout, i, j);
+ if (ret > 0)
+ dht_layout_entry_swap(layout, i, j);
}
+ }
- return 0;
+ return 0;
}
-int
-dht_layout_sort_volname (dht_layout_t *layout)
+void
+dht_layout_sort_volname(dht_layout_t *layout)
{
- int i = 0;
- int j = 0;
- int64_t ret = 0;
+ int i = 0;
+ int j = 0;
+ int64_t ret = 0;
- /* TODO: O(n^2) -- bad bad */
+ /* TODO: O(n^2) -- bad bad */
- for (i = 0; i < layout->cnt - 1; i++) {
- for (j = i + 1; j < layout->cnt; j++) {
- ret = dht_layout_entry_cmp_volname (layout, i, j);
- if (ret > 0)
- dht_layout_entry_swap (layout, i, j);
- }
+ for (i = 0; i < layout->cnt - 1; i++) {
+ for (j = i + 1; j < layout->cnt; j++) {
+ ret = dht_layout_entry_cmp_volname(layout, i, j);
+ if (ret > 0)
+ dht_layout_entry_swap(layout, i, j);
}
-
- return 0;
+ }
}
-
-int
-dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
- uint32_t *holes_p, uint32_t *overlaps_p,
- uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p,
- uint32_t *no_space_p)
+void
+dht_layout_anomalies(xlator_t *this, loc_t *loc, dht_layout_t *layout,
+ uint32_t *holes_p, uint32_t *overlaps_p,
+ uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p,
+ uint32_t *no_space_p)
{
- uint32_t overlaps = 0;
- uint32_t missing = 0;
- uint32_t down = 0;
- uint32_t misc = 0;
- uint32_t hole_cnt = 0;
- uint32_t overlap_cnt = 0;
- int i = 0;
- int ret = 0;
- uint32_t prev_stop = 0;
- uint32_t last_stop = 0;
- char is_virgin = 1;
- uint32_t no_space = 0;
-
- /* This function scans through the layout spread of a directory to
- check if there are any anomalies. Prior to calling this function
- the layout entries should be sorted in the ascending order.
-
- If the layout entry has err != 0
- then increment the corresponding anomaly.
- else
- if (start of the current layout entry > stop + 1 of previous
- non erroneous layout entry)
- then it indicates a hole in the layout
- if (start of the current layout entry < stop + 1 of previous
- non erroneous layout entry)
- then it indicates an overlap in the layout
- */
- last_stop = layout->list[0].start - 1;
- prev_stop = last_stop;
-
- for (i = 0; i < layout->cnt; i++) {
- switch (layout->list[i].err) {
- case -1:
- case ENOENT:
- case ESTALE:
- missing++;
- continue;
- case ENOTCONN:
- down++;
- continue;
- case ENOSPC:
- no_space++;
- continue;
- case 0:
- /* if err == 0 and start == stop, then it is a non misc++;
- * participating subvolume(spread-cnt). Then, do not
- * check for anomalies. If start != stop, then treat it
- * as misc err */
- if (layout->list[i].start == layout->list[i].stop) {
- continue;
- }
- break;
- default:
- misc++;
- continue;
- }
-
- is_virgin = 0;
-
- if ((prev_stop + 1) < layout->list[i].start) {
- hole_cnt++;
+ uint32_t overlaps = 0;
+ uint32_t missing = 0;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+ uint32_t hole_cnt = 0;
+ uint32_t overlap_cnt = 0;
+ int i = 0;
+ uint32_t prev_stop = 0;
+ uint32_t last_stop = 0;
+ char is_virgin = 1;
+ uint32_t no_space = 0;
+
+ /* This function scans through the layout spread of a directory to
+ check if there are any anomalies. Prior to calling this function
+ the layout entries should be sorted in the ascending order.
+
+ If the layout entry has err != 0
+ then increment the corresponding anomaly.
+ else
+ if (start of the current layout entry > stop + 1 of previous
+ non erroneous layout entry)
+ then it indicates a hole in the layout
+ if (start of the current layout entry < stop + 1 of previous
+ non erroneous layout entry)
+ then it indicates an overlap in the layout
+ */
+ last_stop = layout->list[0].start - 1;
+ prev_stop = last_stop;
+
+ for (i = 0; i < layout->cnt; i++) {
+ switch (layout->list[i].err) {
+ case -1:
+ case ENOENT:
+ case ESTALE:
+ missing++;
+ continue;
+ case ENOTCONN:
+ down++;
+ continue;
+ case ENOSPC:
+ no_space++;
+ continue;
+ case 0:
+ /* if err == 0 and start == stop, then it is a non misc++;
+ * participating subvolume(spread-cnt). Then, do not
+ * check for anomalies. If start != stop, then treat it
+ * as misc err */
+ if (layout->list[i].start == layout->list[i].stop) {
+ continue;
}
+ break;
+ default:
+ misc++;
+ continue;
+ }
- if ((prev_stop + 1) > layout->list[i].start) {
- overlap_cnt++;
- overlaps += ((prev_stop + 1) - layout->list[i].start);
- }
- prev_stop = layout->list[i].stop;
+ is_virgin = 0;
+
+ if ((prev_stop + 1) < layout->list[i].start) {
+ hole_cnt++;
}
- if ((last_stop - prev_stop) || is_virgin)
- hole_cnt++;
+ if ((prev_stop + 1) > layout->list[i].start) {
+ overlap_cnt++;
+ overlaps += ((prev_stop + 1) - layout->list[i].start);
+ }
+ prev_stop = layout->list[i].stop;
+ }
- if (holes_p)
- *holes_p = hole_cnt;
+ if ((last_stop - prev_stop) || is_virgin)
+ hole_cnt++;
- if (overlaps_p)
- *overlaps_p = overlap_cnt;
+ if (holes_p)
+ *holes_p = hole_cnt;
- if (missing_p)
- *missing_p = missing;
+ if (overlaps_p)
+ *overlaps_p = overlap_cnt;
- if (down_p)
- *down_p = down;
+ if (missing_p)
+ *missing_p = missing;
- if (misc_p)
- *misc_p = misc;
+ if (down_p)
+ *down_p = down;
- if (no_space_p)
- *no_space_p = no_space;
+ if (misc_p)
+ *misc_p = misc;
- return ret;
+ if (no_space_p)
+ *no_space_p = no_space;
}
-
int
-dht_layout_missing_dirs (dht_layout_t *layout)
+dht_layout_missing_dirs(dht_layout_t *layout)
{
- int i = 0, missing = 0;
+ int i = 0, missing = 0;
- if (layout == NULL)
- goto out;
+ if (layout == NULL)
+ goto out;
- for (i = 0; i < layout->cnt; i++) {
- if ((layout->list[i].err == ENOENT)
- || ((layout->list[i].err == -1)
- && (layout->list[i].start == 0)
- && (layout->list[i].stop == 0))) {
- missing++;
- }
+ for (i = 0; i < layout->cnt; i++) {
+ if ((layout->list[i].err == ENOENT) ||
+ ((layout->list[i].err == -1) && (layout->list[i].start == 0) &&
+ (layout->list[i].stop == 0))) {
+ missing++;
}
+ }
out:
- return missing;
+ return missing;
}
-
int
-dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout)
+dht_layout_normalize(xlator_t *this, loc_t *loc, dht_layout_t *layout)
{
- int ret = 0;
- uint32_t holes = 0;
- uint32_t overlaps = 0;
- uint32_t missing = 0;
- uint32_t down = 0;
- uint32_t misc = 0, missing_dirs = 0;
- char gfid[GF_UUID_BUF_SIZE] = {0};
-
- ret = dht_layout_sort (layout);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "sort failed?! how the ....");
- goto out;
- }
-
- gf_uuid_unparse(loc->gfid, gfid);
-
- ret = dht_layout_anomalies (this, loc, layout,
- &holes, &overlaps,
- &missing, &down, &misc, NULL);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "Error finding anomalies in %s, gfid = %s",
- loc->path, gfid);
- goto out;
- }
-
- if (holes || overlaps) {
- if (missing == layout->cnt) {
- gf_msg_debug (this->name, 0,
- "Directory %s looked up first time"
- " gfid = %s", loc->path, gfid);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "Found anomalies in %s (gfid = %s). "
- "Holes=%d overlaps=%d",
- loc->path, gfid, holes, overlaps );
- }
- ret = -1;
- }
-
- if (ret >= 0) {
- missing_dirs = dht_layout_missing_dirs (layout);
- /* TODO During DHT selfheal rewrite (almost) find a better place
- * to detect this - probably in dht_layout_anomalies()
- */
- if (missing_dirs > 0)
- ret += missing_dirs;
- }
+ int ret = 0;
+ uint32_t holes = 0;
+ uint32_t overlaps = 0;
+ uint32_t missing = 0;
+ uint32_t down = 0;
+ uint32_t misc = 0, missing_dirs = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ ret = dht_layout_sort(layout);
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SORT_FAILED,
+ NULL);
+ goto out;
+ }
+
+ gf_uuid_unparse(loc->gfid, gfid);
+
+ dht_layout_anomalies(this, loc, layout, &holes, &overlaps, &missing, &down,
+ &misc, NULL);
+
+ if (holes || overlaps) {
+ if (missing == layout->cnt) {
+ gf_msg_debug(this->name, 0,
+ "Directory %s looked up first time"
+ " gfid = %s",
+ loc->path, gfid);
+ } else {
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_ANOMALIES_INFO,
+ "path=%s", loc->path, "gfid=%s", gfid, "holes=%d", holes,
+ "overlaps=%d", overlaps, NULL);
+ }
+ ret = -1;
+ }
+
+ if (ret >= 0) {
+ missing_dirs = dht_layout_missing_dirs(layout);
+ /* TODO During DHT selfheal rewrite (almost) find a better place
+ * to detect this - probably in dht_layout_anomalies()
+ */
+ if (missing_dirs > 0)
+ ret += missing_dirs;
+ }
out:
- return ret;
+ return ret;
}
int
-dht_dir_has_layout (dict_t *xattr, char *name)
+dht_dir_has_layout(dict_t *xattr, char *name)
{
+ void *disk_layout_raw = NULL;
- void *disk_layout_raw = NULL;
-
- return dict_get_ptr (xattr, name, &disk_layout_raw);
+ return dict_get_ptr(xattr, name, &disk_layout_raw);
}
int
-dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
- loc_t *loc, dict_t *xattr)
+dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
+ loc_t *loc, dict_t *xattr)
{
- int idx = 0;
- int pos = -1;
- int ret = 0;
- int err = 0;
- int dict_ret = 0;
- int32_t disk_layout[4];
- void *disk_layout_raw = NULL;
- int32_t count = -1;
- uint32_t start_off = -1;
- uint32_t stop_off = -1;
- dht_conf_t *conf = this->private;
- char gfid[GF_UUID_BUF_SIZE] = {0};
-
- if(loc && loc->inode)
- gf_uuid_unparse(loc->inode->gfid, gfid);
-
- for (idx = 0; idx < layout->cnt; idx++) {
- if (layout->list[idx].xlator == subvol) {
- pos = idx;
- break;
- }
- }
-
- if (pos == -1) {
- gf_msg_debug (this->name, 0,
- "%s - no layout info for subvolume %s",
- loc->path, subvol->name);
- ret = 1;
- goto out;
- }
+ int idx = 0;
+ int pos = -1;
+ int ret = 0;
+ int err = 0;
+ int dict_ret = 0;
+ int32_t disk_layout[4];
+ void *disk_layout_raw = NULL;
+ uint32_t start_off = -1;
+ uint32_t stop_off = -1;
+ uint32_t commit_hash = -1;
+ dht_conf_t *conf = this->private;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ if (loc && loc->inode)
+ gf_uuid_unparse(loc->inode->gfid, gfid);
+
+ for (idx = 0; idx < layout->cnt; idx++) {
+ if (layout->list[idx].xlator == subvol) {
+ pos = idx;
+ break;
+ }
+ }
+
+ if (pos == -1) {
+ if (loc) {
+ gf_msg_debug(this->name, 0, "%s - no layout info for subvolume %s",
+ loc ? loc->path : "path not found", subvol->name);
+ }
+ ret = 1;
+ goto out;
+ }
+
+ err = layout->list[pos].err;
+
+ if (!xattr) {
+ if (err == 0) {
+ if (loc) {
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_XATTR_DICT_NULL,
+ "path=%s", loc->path, NULL);
+ } else {
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_XATTR_DICT_NULL,
+ "path not found", NULL);
+ }
+ ret = -1;
+ }
+ goto out;
+ }
+
+ dict_ret = dict_get_ptr(xattr, conf->xattr_name, &disk_layout_raw);
+
+ if (dict_ret < 0) {
+ if (err == 0 && layout->list[pos].stop) {
+ if (loc) {
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
+ } else {
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
+ "path not found"
+ "gfid=%s",
+ gfid, NULL);
+ }
+ ret = -1;
+ }
+ goto out;
+ }
+
+ memcpy(disk_layout, disk_layout_raw, sizeof(disk_layout));
+
+ start_off = ntoh32(disk_layout[2]);
+ stop_off = ntoh32(disk_layout[3]);
+ commit_hash = ntoh32(disk_layout[0]);
+
+ if ((layout->list[pos].start != start_off) ||
+ (layout->list[pos].stop != stop_off) ||
+ (layout->list[pos].commit_hash != commit_hash)) {
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_INFO, "subvol=%s",
+ layout->list[pos].xlator->name, "inode-layout:start=0x%x",
+ layout->list[pos].start, "inode-layout:stop=0x%x",
+ layout->list[pos].stop, "layout-commit-hash=0x%x; ",
+ layout->list[pos].commit_hash, "disk-layout:start-off=0x%x",
+ start_off, "disk-layout:top-off=0x%x", stop_off,
+ "commit-hash=0x%x", commit_hash, NULL);
+ ret = 1;
+ } else {
+ ret = 0;
+ }
+out:
+ return ret;
+}
- err = layout->list[pos].err;
+int
+dht_layout_preset(xlator_t *this, xlator_t *subvol, inode_t *inode)
+{
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ dht_conf_t *conf = NULL;
- if (!xattr) {
- if (err == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: xattr dictionary is NULL",
- loc->path);
- ret = -1;
- }
- goto out;
- }
+ conf = this->private;
+ if (!conf)
+ goto out;
- dict_ret = dict_get_ptr (xattr, conf->xattr_name,
- &disk_layout_raw);
+ layout = dht_layout_for_subvol(this, subvol);
+ if (!layout) {
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_NO_LAYOUT_INFO,
+ "subvolume=%s", subvol ? subvol->name : "<nil>", NULL);
+ ret = -1;
+ goto out;
+ }
- if (dict_ret < 0) {
- if (err == 0 && layout->list[pos].stop) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: Disk layout missing, gfid = %s",
- loc->path, gfid);
- ret = -1;
- }
- goto out;
- }
+ gf_msg_debug(this->name, 0, "file = %s, subvol = %s",
+ uuid_utoa(inode->gfid), subvol ? subvol->name : "<nil>");
- memcpy (disk_layout, disk_layout_raw, sizeof (disk_layout));
+ LOCK(&conf->layout_lock);
+ {
+ dht_inode_ctx_layout_set(inode, this, layout);
+ }
- count = ntoh32 (disk_layout[0]);
- if (count != 1) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_INVALID_DISK_LAYOUT,
- "Invalid disk layout: invalid count %d,"
- "path = %s, gfid = %s ", count, loc->path, gfid);
- ret = -1;
- goto out;
- }
+ UNLOCK(&conf->layout_lock);
- start_off = ntoh32 (disk_layout[2]);
- stop_off = ntoh32 (disk_layout[3]);
-
- if ((layout->list[pos].start != start_off)
- || (layout->list[pos].stop != stop_off)) {
- gf_log (this->name, GF_LOG_INFO,
- "subvol: %s; inode layout - %"PRIu32" - %"PRIu32"; "
- "disk layout - %"PRIu32" - %"PRIu32,
- layout->list[pos].xlator->name,
- layout->list[pos].start, layout->list[pos].stop,
- start_off, stop_off);
- ret = 1;
- } else {
- ret = 0;
- }
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode)
+dht_layout_index_for_subvol(dht_layout_t *layout, xlator_t *subvol)
{
- dht_layout_t *layout = NULL;
- int ret = -1;
- dht_conf_t *conf = NULL;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- layout = dht_layout_for_subvol (this, subvol);
- if (!layout) {
- gf_log (this->name, GF_LOG_INFO,
- "no pre-set layout for subvolume %s",
- subvol ? subvol->name : "<nil>");
- ret = -1;
- goto out;
- }
+ int i = 0, ret = -1;
- LOCK (&conf->layout_lock);
- {
- dht_inode_ctx_layout_set (inode, this, layout);
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol) {
+ ret = i;
+ break;
}
- UNLOCK (&conf->layout_lock);
+ }
- ret = 0;
-out:
- return ret;
+ return ret;
}
diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c
index 14df3187097..89ec6cca56e 100644
--- a/xlators/cluster/dht/src/dht-linkfile.c
+++ b/xlators/cluster/dht/src/dht-linkfile.c
@@ -8,348 +8,321 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "compat.h"
+#include <glusterfs/compat.h>
#include "dht-common.h"
-#include "dht-messages.h"
-int
-dht_linkfile_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
+static int
+dht_linkfile_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
- char is_linkfile = 0;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- char gfid[GF_UUID_BUF_SIZE] = {0};
-
- local = frame->local;
- prev = cookie;
- conf = this->private;
-
- if (op_ret)
- goto out;
-
- gf_uuid_unparse(local->loc.gfid, gfid);
-
- is_linkfile = check_is_linkfile (inode, stbuf, xattr,
- conf->link_xattr_name);
- if (!is_linkfile)
- gf_log (this->name, GF_LOG_WARNING,
- "got non-linkfile %s:%s, gfid = %s",
- prev->this->name, local->loc.path, gfid);
+ char is_linkfile = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ if (op_ret)
+ goto out;
+
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name);
+ if (!is_linkfile)
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NOT_LINK_FILE_ERROR,
+ "name=%s", prev->name, "path=%s", local->loc.path, "gfid=%s",
+ gfid, NULL);
out:
- local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno,
- inode, stbuf, postparent, postparent,
- xattr);
- return 0;
+ local->linkfile.linkfile_cbk(frame, cookie, this, op_ret, op_errno, inode,
+ stbuf, postparent, postparent, xattr);
+ return 0;
}
-#define is_equal(a, b) ((a) == (b))
-int
-dht_linkfile_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+static int
+dht_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- call_frame_t *prev = NULL;
- dict_t *xattrs = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
-
- local = frame->local;
-
- if (!op_ret)
- local->linked = _gf_true;
-
- FRAME_SU_UNDO (frame, dht_local_t);
-
- if (op_ret && (op_errno == EEXIST)) {
- conf = this->private;
- prev = cookie;
- subvol = prev->this;
- if (!subvol)
- goto out;
- xattrs = dict_new ();
- if (!xattrs)
- goto out;
- ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value. key : %s",
- conf->link_xattr_name);
- goto out;
- }
-
- STACK_WIND (frame, dht_linkfile_lookup_cbk, subvol,
- subvol->fops->lookup, &local->loc, xattrs);
- if (xattrs)
- dict_unref (xattrs);
- return 0;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ dict_t *xattrs = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+
+ local = frame->local;
+
+ if (!op_ret)
+ local->linked = _gf_true;
+
+ FRAME_SU_UNDO(frame, dht_local_t);
+
+ if (op_ret && (op_errno == EEXIST)) {
+ conf = this->private;
+ subvol = cookie;
+ if (!subvol)
+ goto out;
+ xattrs = dict_new();
+ if (!xattrs)
+ goto out;
+ ret = dict_set_uint32(xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "mame=%s", conf->link_xattr_name, NULL);
+ goto out;
}
-out:
- local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno,
- inode, stbuf, preparent, postparent,
- xdata);
+
+ STACK_WIND_COOKIE(frame, dht_linkfile_lookup_cbk, subvol, subvol,
+ subvol->fops->lookup, &local->linkfile.loc, xattrs);
if (xattrs)
- dict_unref (xattrs);
+ dict_unref(xattrs);
return 0;
+ }
+out:
+ local->linkfile.linkfile_cbk(frame, cookie, this, op_ret, op_errno, inode,
+ stbuf, preparent, postparent, xdata);
+ if (xattrs)
+ dict_unref(xattrs);
+ return 0;
}
-
int
-dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
- xlator_t *this,
- xlator_t *tovol, xlator_t *fromvol, loc_t *loc)
+dht_linkfile_create(call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
+ xlator_t *this, xlator_t *tovol, xlator_t *fromvol,
+ loc_t *loc)
{
- dht_local_t *local = NULL;
- dict_t *dict = NULL;
- int need_unref = 0;
- int ret = 0;
- dht_conf_t *conf = this->private;
- char gfid[GF_UUID_BUF_SIZE] = {0};
-
- local = frame->local;
- local->linkfile.linkfile_cbk = linkfile_cbk;
- local->linkfile.srcvol = tovol;
-
- local->linked = _gf_false;
-
- dict = local->params;
- if (!dict) {
- dict = dict_new ();
- if (!dict)
- goto out;
- need_unref = 1;
- }
-
-
- if (!gf_uuid_is_null (local->gfid)) {
- gf_uuid_unparse(local->gfid, gfid);
-
- ret = dict_set_static_bin (dict, "gfid-req", local->gfid, 16);
- if (ret)
- gf_msg ("dht-linkfile", GF_LOG_INFO, 0,
- DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value: "
- "key = gfid-req, gfid = %s ", loc->path, gfid);
- } else {
- gf_uuid_unparse(loc->gfid, gfid);
- }
-
- ret = dict_set_str (dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ int need_unref = 0;
+ int ret = 0;
+ dht_conf_t *conf = this->private;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ local->linkfile.linkfile_cbk = linkfile_cbk;
+ local->linkfile.srcvol = tovol;
+ loc_copy(&local->linkfile.loc, loc);
+
+ local->linked = _gf_false;
+
+ dict = local->params;
+ if (!dict) {
+ dict = dict_new();
+ if (!dict)
+ goto out;
+ need_unref = 1;
+ }
+
+ if (!gf_uuid_is_null(local->gfid)) {
+ gf_uuid_unparse(local->gfid, gfid);
+
+ ret = dict_set_gfuuid(dict, "gfid-req", local->gfid, true);
if (ret)
- gf_msg ("dht-linkfile", GF_LOG_INFO, 0,
- DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value: key = %s,"
- " gfid = %s", loc->path,
- GLUSTERFS_INTERNAL_FOP_KEY, gfid);
-
- ret = dict_set_str (dict, conf->link_xattr_name, tovol->name);
-
- if (ret < 0) {
- gf_msg (frame->this->name, GF_LOG_INFO, 0,
- DHT_MSG_CREATE_LINK_FAILED,
- "%s: failed to initialize linkfile data, gfid = %s",
- loc->path, gfid);
- goto out;
- }
-
- local->link_subvol = fromvol;
- /* Always create as root:root. dht_linkfile_attr_heal fixes the
- * ownsership */
- FRAME_SU_DO (frame, dht_local_t);
- STACK_WIND (frame, dht_linkfile_create_cbk,
- fromvol, fromvol->fops->mknod, loc,
- S_IFREG | DHT_LINKFILE_MODE, 0, 0, dict);
-
- if (need_unref && dict)
- dict_unref (dict);
-
- return 0;
+ gf_smsg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
+ } else {
+ gf_uuid_unparse(loc->gfid, gfid);
+ }
+
+ ret = dict_set_str(dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ if (ret)
+ gf_smsg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", GLUSTERFS_INTERNAL_FOP_KEY,
+ "gfid=%s", gfid, NULL);
+
+ ret = dict_set_str(dict, conf->link_xattr_name, tovol->name);
+
+ if (ret < 0) {
+ gf_smsg(frame->this->name, GF_LOG_INFO, 0, DHT_MSG_CREATE_LINK_FAILED,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+
+ local->link_subvol = fromvol;
+ /* Always create as root:root. dht_linkfile_attr_heal fixes the
+ * ownsership */
+ FRAME_SU_DO(frame, dht_local_t);
+ STACK_WIND_COOKIE(frame, dht_linkfile_create_cbk, fromvol, fromvol,
+ fromvol->fops->mknod, loc, S_IFREG | DHT_LINKFILE_MODE, 0,
+ 0, dict);
+
+ if (need_unref && dict)
+ dict_unref(dict);
+
+ return 0;
out:
- local->linkfile.linkfile_cbk (frame, NULL, frame->this, -1, ENOMEM,
- loc->inode, NULL, NULL, NULL, NULL);
+ local->linkfile.linkfile_cbk(frame, frame->this, frame->this, -1, ENOMEM,
+ loc->inode, NULL, NULL, NULL, NULL);
- if (need_unref && dict)
- dict_unref (dict);
+ if (need_unref && dict)
+ dict_unref(dict);
- return 0;
+ return 0;
}
-
int
-dht_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+dht_linkfile_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *subvol = NULL;
- char gfid[GF_UUID_BUF_SIZE] = {0};
-
- local = frame->local;
- prev = cookie;
- subvol = prev->this;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ local = frame->local;
+ subvol = cookie;
- if (op_ret == -1) {
-
- gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg (this->name, GF_LOG_INFO, op_errno,
- DHT_MSG_UNLINK_FAILED,
- "Unlinking linkfile %s (gfid = %s)on "
- "subvolume %s failed ",
- local->loc.path, gfid, subvol->name);
- }
+ if (op_ret == -1) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ gf_smsg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_UNLINK_FAILED,
+ "path=%s", local->loc.path, "gfid=%s", gfid, "subvolume=%s",
+ subvol->name, NULL);
+ }
- DHT_STACK_DESTROY (frame);
+ DHT_STACK_DESTROY(frame);
- return 0;
+ return 0;
}
-
int
-dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
- xlator_t *subvol, loc_t *loc)
+dht_linkfile_unlink(call_frame_t *frame, xlator_t *this, xlator_t *subvol,
+ loc_t *loc)
{
- call_frame_t *unlink_frame = NULL;
- dht_local_t *unlink_local = NULL;
+ call_frame_t *unlink_frame = NULL;
+ dht_local_t *unlink_local = NULL;
- unlink_frame = copy_frame (frame);
- if (!unlink_frame) {
- goto err;
- }
+ unlink_frame = copy_frame(frame);
+ if (!unlink_frame) {
+ goto err;
+ }
- /* Using non-fop value here, as anyways, 'local->fop' is not used in
- this particular case */
- unlink_local = dht_local_init (unlink_frame, loc, NULL,
- GF_FOP_MAXVALUE);
- if (!unlink_local) {
- goto err;
- }
+ /* Using non-fop value here, as anyways, 'local->fop' is not used in
+ this particular case */
+ unlink_local = dht_local_init(unlink_frame, loc, NULL, GF_FOP_MAXVALUE);
+ if (!unlink_local) {
+ goto err;
+ }
- STACK_WIND (unlink_frame, dht_linkfile_unlink_cbk,
- subvol, subvol->fops->unlink,
- &unlink_local->loc, 0, NULL);
+ STACK_WIND_COOKIE(unlink_frame, dht_linkfile_unlink_cbk, subvol, subvol,
+ subvol->fops->unlink, &unlink_local->loc, 0, NULL);
- return 0;
+ return 0;
err:
- if (unlink_frame)
- DHT_STACK_DESTROY (unlink_frame);
+ if (unlink_frame)
+ DHT_STACK_DESTROY(unlink_frame);
- return -1;
+ return -1;
}
-
xlator_t *
-dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct iatt *stbuf,
- dict_t *xattr)
+dht_linkfile_subvol(xlator_t *this, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr)
{
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
- void *volname = NULL;
- int i = 0, ret = 0;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ void *volname = NULL;
+ int i = 0, ret = 0;
- conf = this->private;
+ conf = this->private;
- if (!xattr)
- goto out;
+ if (!xattr)
+ goto out;
- ret = dict_get_ptr (xattr, conf->link_xattr_name, &volname);
+ ret = dict_get_ptr(xattr, conf->link_xattr_name, &volname);
- if ((-1 == ret) || !volname)
- goto out;
+ if ((-1 == ret) || !volname)
+ goto out;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (strcmp (conf->subvolumes[i]->name, (char *)volname) == 0) {
- subvol = conf->subvolumes[i];
- break;
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (strcmp(conf->subvolumes[i]->name, (char *)volname) == 0) {
+ subvol = conf->subvolumes[i];
+ break;
}
+ }
out:
- return subvol;
+ return subvol;
}
-int
-dht_linkfile_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata)
+static int
+dht_linkfile_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
{
- dht_local_t *local = NULL;
- loc_t *loc = NULL;
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
- local = frame->local;
- loc = &local->loc;
+ local = frame->local;
+ loc = &local->loc;
- if (op_ret)
- gf_msg (this->name, GF_LOG_ERROR, op_errno,
- DHT_MSG_SETATTR_FAILED,
- "Failed to set attr uid/gid on %s"
- " :<gfid:%s> ",
- (loc->path? loc->path: "NULL"),
- uuid_utoa(local->gfid));
+ if (op_ret)
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_SETATTR_FAILED,
+ "path=%s", (loc->path ? loc->path : "NULL"), "gfid=%s",
+ uuid_utoa(local->gfid), NULL);
- DHT_STACK_DESTROY (frame);
+ DHT_STACK_DESTROY(frame);
- return 0;
+ return 0;
}
int
-dht_linkfile_attr_heal (call_frame_t *frame, xlator_t *this)
+dht_linkfile_attr_heal(call_frame_t *frame, xlator_t *this)
{
- int ret = -1;
- call_frame_t *copy = NULL;
- dht_local_t *local = NULL;
- dht_local_t *copy_local = NULL;
- xlator_t *subvol = NULL;
- struct iatt stbuf = {0,};
+ int ret = -1;
+ call_frame_t *copy = NULL;
+ dht_local_t *local = NULL;
+ dht_local_t *copy_local = NULL;
+ xlator_t *subvol = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO("dht", local, out);
+ GF_VALIDATE_OR_GOTO("dht", local->link_subvol, out);
+
+ if (local->stbuf.ia_type == IA_INVAL)
+ return 0;
- local = frame->local;
+ DHT_MARK_FOP_INTERNAL(xattr);
- GF_VALIDATE_OR_GOTO ("dht", local, out);
- GF_VALIDATE_OR_GOTO ("dht", local->link_subvol, out);
+ gf_uuid_copy(local->loc.gfid, local->stbuf.ia_gfid);
- if (local->stbuf.ia_type == IA_INVAL)
- return 0;
+ copy = copy_frame(frame);
- gf_uuid_copy (local->loc.gfid, local->stbuf.ia_gfid);
+ if (!copy)
+ goto out;
- copy = copy_frame (frame);
+ copy_local = dht_local_init(copy, &local->loc, NULL, 0);
- if (!copy)
- goto out;
+ if (!copy_local)
+ goto out;
- copy_local = dht_local_init (copy, &local->loc, NULL, 0);
+ stbuf = local->stbuf;
+ subvol = local->link_subvol;
- if (!copy_local)
- goto out;
+ copy->local = copy_local;
- stbuf = local->stbuf;
- subvol = local->link_subvol;
+ FRAME_SU_DO(copy, dht_local_t);
- copy->local = copy_local;
+ STACK_WIND(copy, dht_linkfile_setattr_cbk, subvol, subvol->fops->setattr,
+ &copy_local->loc, &stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
+ xattr);
+ ret = 0;
+out:
+ if ((ret < 0) && (copy))
+ DHT_STACK_DESTROY(copy);
- FRAME_SU_DO (copy, dht_local_t);
+ if (xattr)
+ dict_unref(xattr);
- STACK_WIND (copy, dht_linkfile_setattr_cbk, subvol,
- subvol->fops->setattr, &copy_local->loc,
- &stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL);
- ret = 0;
-out:
- return ret;
+ return ret;
}
diff --git a/xlators/cluster/dht/src/dht-lock.c b/xlators/cluster/dht/src/dht-lock.c
new file mode 100644
index 00000000000..638821ccee5
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-lock.c
@@ -0,0 +1,1392 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "dht-lock.h"
+
+static char *
+dht_lock_asprintf(dht_lock_t *lock)
+{
+ char *lk_buf = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+
+ if (lock == NULL)
+ goto out;
+
+ uuid_utoa_r(lock->loc.gfid, gfid);
+
+ gf_asprintf(&lk_buf, "%s:%s", lock->xl->name, gfid);
+
+out:
+ return lk_buf;
+}
+
+static void
+dht_log_lk_array(char *name, gf_loglevel_t log_level, dht_lock_t **lk_array,
+ int count)
+{
+ int i = 0;
+ char *lk_buf = NULL;
+
+ if ((lk_array == NULL) || (count == 0))
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ lk_buf = dht_lock_asprintf(lk_array[i]);
+ if (!lk_buf)
+ goto out;
+
+ gf_smsg(name, log_level, 0, DHT_MSG_LK_ARRAY_INFO, "index=%d", i,
+ "lk_buf=%s", lk_buf, NULL);
+ GF_FREE(lk_buf);
+ }
+
+out:
+ return;
+}
+
+static void
+dht_lock_stack_destroy(call_frame_t *lock_frame, dht_lock_type_t lk)
+{
+ dht_local_t *local = NULL;
+
+ local = lock_frame->local;
+
+ if (lk == DHT_INODELK) {
+ local->lock[0].layout.my_layout.locks = NULL;
+ local->lock[0].layout.my_layout.lk_count = 0;
+ } else {
+ local->lock[0].ns.directory_ns.locks = NULL;
+ local->lock[0].ns.directory_ns.lk_count = 0;
+ }
+
+ DHT_STACK_DESTROY(lock_frame);
+ return;
+}
+
+static void
+dht_lock_free(dht_lock_t *lock)
+{
+ if (lock == NULL)
+ goto out;
+
+ loc_wipe(&lock->loc);
+ GF_FREE(lock->domain);
+ GF_FREE(lock->basename);
+ mem_put(lock);
+
+out:
+ return;
+}
+
+static void
+dht_set_lkowner(dht_lock_t **lk_array, int count, gf_lkowner_t *lkowner)
+{
+ int i = 0;
+
+ if (!lk_array || !lkowner)
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ lk_array[i]->lk_owner = *lkowner;
+ }
+
+out:
+ return;
+}
+
+static int
+dht_lock_request_cmp(const void *val1, const void *val2)
+{
+ dht_lock_t *lock1 = NULL;
+ dht_lock_t *lock2 = NULL;
+ int ret = -1;
+
+ lock1 = *(dht_lock_t **)val1;
+ lock2 = *(dht_lock_t **)val2;
+
+ GF_VALIDATE_OR_GOTO("dht-locks", lock1, out);
+ GF_VALIDATE_OR_GOTO("dht-locks", lock2, out);
+
+ ret = strcmp(lock1->xl->name, lock2->xl->name);
+
+ if (ret == 0) {
+ ret = gf_uuid_compare(lock1->loc.gfid, lock2->loc.gfid);
+ }
+
+out:
+ return ret;
+}
+
+static int
+dht_lock_order_requests(dht_lock_t **locks, int count)
+{
+ int ret = -1;
+
+ if (!locks || !count)
+ goto out;
+
+ qsort(locks, count, sizeof(*locks), dht_lock_request_cmp);
+ ret = 0;
+
+out:
+ return ret;
+}
+
+void
+dht_lock_array_free(dht_lock_t **lk_array, int count)
+{
+ int i = 0;
+ dht_lock_t *lock = NULL;
+
+ if (lk_array == NULL)
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ lock = lk_array[i];
+ lk_array[i] = NULL;
+ dht_lock_free(lock);
+ }
+
+out:
+ return;
+}
+
+int32_t
+dht_lock_count(dht_lock_t **lk_array, int lk_count)
+{
+ int i = 0, locked = 0;
+
+ if ((lk_array == NULL) || (lk_count == 0))
+ goto out;
+
+ for (i = 0; i < lk_count; i++) {
+ if (lk_array[i]->locked)
+ locked++;
+ }
+out:
+ return locked;
+}
+
+static call_frame_t *
+dht_lock_frame(call_frame_t *parent_frame)
+{
+ call_frame_t *lock_frame = NULL;
+
+ lock_frame = copy_frame(parent_frame);
+ if (lock_frame == NULL)
+ goto out;
+
+ set_lk_owner_from_ptr(&lock_frame->root->lk_owner, parent_frame->root);
+
+out:
+ return lock_frame;
+}
+
+dht_lock_t *
+dht_lock_new(xlator_t *this, xlator_t *xl, loc_t *loc, short type,
+ const char *domain, const char *basename,
+ dht_reaction_type_t do_on_failure)
+{
+ dht_conf_t *conf = NULL;
+ dht_lock_t *lock = NULL;
+
+ conf = this->private;
+
+ lock = mem_get0(conf->lock_pool);
+ if (lock == NULL)
+ goto out;
+
+ lock->xl = xl;
+ lock->type = type;
+ lock->do_on_failure = do_on_failure;
+
+ lock->domain = gf_strdup(domain);
+ if (lock->domain == NULL) {
+ dht_lock_free(lock);
+ lock = NULL;
+ goto out;
+ }
+
+ if (basename) {
+ lock->basename = gf_strdup(basename);
+ if (lock->basename == NULL) {
+ dht_lock_free(lock);
+ lock = NULL;
+ goto out;
+ }
+ }
+
+ /* Fill only inode and gfid.
+ posix and protocol/server give preference to pargfid/basename over
+ gfid/inode for resolution if all the three parameters of loc_t are
+ present. I want to avoid the following hypothetical situation:
+
+ 1. rebalance did a lookup on a dentry and got a gfid.
+ 2. rebalance acquires lock on loc_t which was filled with gfid and
+ path (pargfid/bname) from step 1.
+ 3. somebody deleted and recreated the same file
+ 4. rename on the same path acquires lock on loc_t which now points
+ to a different inode (and hence gets the lock).
+ 5. rebalance continues to migrate file (note that not all fops done
+ by rebalance during migration are inode/gfid based Eg., unlink)
+ 6. rename continues.
+ */
+ lock->loc.inode = inode_ref(loc->inode);
+ loc_gfid(loc, lock->loc.gfid);
+
+out:
+ return lock;
+}
+
+static int
+dht_local_entrylk_init(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_entrylk_cbk_t entrylk_cbk)
+{
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local == NULL) {
+ local = dht_local_init(frame, NULL, NULL, 0);
+ }
+
+ if (local == NULL) {
+ goto out;
+ }
+
+ local->lock[0].ns.directory_ns.entrylk_cbk = entrylk_cbk;
+ local->lock[0].ns.directory_ns.locks = lk_array;
+ local->lock[0].ns.directory_ns.lk_count = lk_count;
+
+ ret = dht_lock_order_requests(local->lock[0].ns.directory_ns.locks,
+ local->lock[0].ns.directory_ns.lk_count);
+ if (ret < 0)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static void
+dht_entrylk_done(call_frame_t *lock_frame)
+{
+ fop_entrylk_cbk_t entrylk_cbk = NULL;
+ call_frame_t *main_frame = NULL;
+ dht_local_t *local = NULL;
+
+ local = lock_frame->local;
+ main_frame = local->main_frame;
+
+ local->lock[0].ns.directory_ns.locks = NULL;
+ local->lock[0].ns.directory_ns.lk_count = 0;
+
+ entrylk_cbk = local->lock[0].ns.directory_ns.entrylk_cbk;
+ local->lock[0].ns.directory_ns.entrylk_cbk = NULL;
+
+ entrylk_cbk(main_frame, NULL, main_frame->this,
+ local->lock[0].ns.directory_ns.op_ret,
+ local->lock[0].ns.directory_ns.op_errno, NULL);
+
+ dht_lock_stack_destroy(lock_frame, DHT_ENTRYLK);
+ return;
+}
+
+static int32_t
+dht_unlock_entrylk_done(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ gf_uuid_unparse(local->lock[0].ns.directory_ns.locks[0]->loc.inode->gfid,
+ gfid);
+
+ if (op_ret < 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_UNLOCK_GFID_FAILED, "gfid=%s", gfid,
+ "DHT_LAYOUT_HEAL_DOMAIN", NULL);
+ }
+
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
+
+static int32_t
+dht_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int lk_index = 0, call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ lk_index = (long)cookie;
+
+ local = frame->local;
+
+ uuid_utoa_r(local->lock[0].ns.directory_ns.locks[lk_index]->loc.gfid, gfid);
+
+ if (op_ret < 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
+ "name=%s",
+ local->lock[0].ns.directory_ns.locks[lk_index]->xl->name,
+ "gfid=%s", gfid, NULL);
+ } else {
+ local->lock[0].ns.directory_ns.locks[lk_index]->locked = 0;
+ }
+
+ call_cnt = dht_frame_return(frame);
+ if (is_last_call(call_cnt)) {
+ dht_entrylk_done(frame);
+ }
+
+ return 0;
+}
+
+static int32_t
+dht_unlock_entrylk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_entrylk_cbk_t entrylk_cbk)
+{
+ dht_local_t *local = NULL;
+ int ret = -1, i = 0;
+ call_frame_t *lock_frame = NULL;
+ int call_cnt = 0;
+
+ GF_VALIDATE_OR_GOTO("dht-locks", frame, done);
+ GF_VALIDATE_OR_GOTO(frame->this->name, lk_array, done);
+ GF_VALIDATE_OR_GOTO(frame->this->name, entrylk_cbk, done);
+
+ call_cnt = dht_lock_count(lk_array, lk_count);
+ if (call_cnt == 0) {
+ ret = 0;
+ goto done;
+ }
+
+ lock_frame = dht_lock_frame(frame);
+ if (lock_frame == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS,
+ NULL);
+
+ dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
+ goto done;
+ }
+
+ ret = dht_local_entrylk_init(lock_frame, lk_array, lk_count, entrylk_cbk);
+ if (ret < 0) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK,
+ NULL);
+
+ dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
+
+ goto done;
+ }
+
+ local = lock_frame->local;
+ local->main_frame = frame;
+ local->call_cnt = call_cnt;
+
+ for (i = 0; i < local->lock[0].ns.directory_ns.lk_count; i++) {
+ if (!local->lock[0].ns.directory_ns.locks[i]->locked)
+ continue;
+
+ lock_frame->root
+ ->lk_owner = local->lock[0].ns.directory_ns.locks[i]->lk_owner;
+ STACK_WIND_COOKIE(
+ lock_frame, dht_unlock_entrylk_cbk, (void *)(long)i,
+ local->lock[0].ns.directory_ns.locks[i]->xl,
+ local->lock[0].ns.directory_ns.locks[i]->xl->fops->entrylk,
+ local->lock[0].ns.directory_ns.locks[i]->domain,
+ &local->lock[0].ns.directory_ns.locks[i]->loc,
+ local->lock[0].ns.directory_ns.locks[i]->basename, ENTRYLK_UNLOCK,
+ ENTRYLK_WRLCK, NULL);
+ if (!--call_cnt)
+ break;
+ }
+
+ return 0;
+
+done:
+ if (lock_frame)
+ dht_lock_stack_destroy(lock_frame, DHT_ENTRYLK);
+
+ /* no locks acquired, invoke entrylk_cbk */
+ if (ret == 0)
+ entrylk_cbk(frame, NULL, frame->this, 0, 0, NULL);
+
+ return ret;
+}
+
+int32_t
+dht_unlock_entrylk_wrapper(call_frame_t *frame, dht_elock_wrap_t *entrylk)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+
+ local = frame->local;
+
+ if (!entrylk || !entrylk->locks)
+ goto out;
+
+ gf_uuid_unparse(local->loc.parent->gfid, pgfid);
+
+ lock_frame = copy_frame(frame);
+ if (lock_frame == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_COPY_FRAME_FAILED, "pgfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
+ goto done;
+ }
+
+ lock_local = dht_local_init(lock_frame, NULL, NULL, 0);
+ if (lock_local == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_CREATE_FAILED, "local", "pgfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
+ goto done;
+ }
+
+ lock_frame->local = lock_local;
+
+ lock_local->lock[0].ns.directory_ns.locks = entrylk->locks;
+ lock_local->lock[0].ns.directory_ns.lk_count = entrylk->lk_count;
+ entrylk->locks = NULL;
+ entrylk->lk_count = 0;
+
+ ret = dht_unlock_entrylk(
+ lock_frame, lock_local->lock[0].ns.directory_ns.locks,
+ lock_local->lock[0].ns.directory_ns.lk_count, dht_unlock_entrylk_done);
+ if (ret)
+ goto done;
+
+ lock_frame = NULL;
+
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY(lock_frame);
+ }
+
+out:
+ return 0;
+}
+
+static int
+dht_entrylk_cleanup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_entrylk_done(frame);
+ return 0;
+}
+
+static void
+dht_entrylk_cleanup(call_frame_t *lock_frame)
+{
+ dht_lock_t **lk_array = NULL;
+ int lk_count = 0, lk_acquired = 0;
+ dht_local_t *local = NULL;
+
+ local = lock_frame->local;
+
+ lk_array = local->lock[0].ns.directory_ns.locks;
+ lk_count = local->lock[0].ns.directory_ns.lk_count;
+
+ lk_acquired = dht_lock_count(lk_array, lk_count);
+ if (lk_acquired != 0) {
+ dht_unlock_entrylk(lock_frame, lk_array, lk_count,
+ dht_entrylk_cleanup_cbk);
+ } else {
+ dht_entrylk_done(lock_frame);
+ }
+
+ return;
+}
+
+static int32_t
+dht_blocking_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int lk_index = 0;
+ int i = 0;
+ dht_local_t *local = NULL;
+
+ lk_index = (long)cookie;
+
+ local = frame->local;
+ if (op_ret == 0) {
+ local->lock[0].ns.directory_ns.locks[lk_index]->locked = _gf_true;
+ } else {
+ switch (op_errno) {
+ case ESTALE:
+ case ENOENT:
+ if (local->lock[0]
+ .ns.directory_ns.locks[lk_index]
+ ->do_on_failure != IGNORE_ENOENT_ESTALE) {
+ local->lock[0].ns.directory_ns.op_ret = -1;
+ local->lock[0].ns.directory_ns.op_errno = op_errno;
+ goto cleanup;
+ }
+ break;
+ default:
+ local->lock[0].ns.directory_ns.op_ret = -1;
+ local->lock[0].ns.directory_ns.op_errno = op_errno;
+ goto cleanup;
+ }
+ }
+
+ if (lk_index == (local->lock[0].ns.directory_ns.lk_count - 1)) {
+ for (i = 0; (i < local->lock[0].ns.directory_ns.lk_count) &&
+ (!local->lock[0].ns.directory_ns.locks[i]->locked);
+ i++)
+ ;
+
+ if (i == local->lock[0].ns.directory_ns.lk_count) {
+ local->lock[0].ns.directory_ns.op_ret = -1;
+ local->lock[0].ns.directory_ns.op_errno = op_errno;
+ }
+
+ dht_entrylk_done(frame);
+ } else {
+ dht_blocking_entrylk_rec(frame, ++lk_index);
+ }
+
+ return 0;
+
+cleanup:
+ dht_entrylk_cleanup(frame);
+
+ return 0;
+}
+
+void
+dht_blocking_entrylk_rec(call_frame_t *frame, int i)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ STACK_WIND_COOKIE(
+ frame, dht_blocking_entrylk_cbk, (void *)(long)i,
+ local->lock[0].ns.directory_ns.locks[i]->xl,
+ local->lock[0].ns.directory_ns.locks[i]->xl->fops->entrylk,
+ local->lock[0].ns.directory_ns.locks[i]->domain,
+ &local->lock[0].ns.directory_ns.locks[i]->loc,
+ local->lock[0].ns.directory_ns.locks[i]->basename, ENTRYLK_LOCK,
+ ENTRYLK_WRLCK, NULL);
+
+ return;
+}
+
+int
+dht_blocking_entrylk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_entrylk_cbk_t entrylk_cbk)
+{
+ int ret = -1;
+ call_frame_t *lock_frame = NULL;
+ dht_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht-locks", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, lk_array, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, entrylk_cbk, out);
+
+ lock_frame = dht_lock_frame(frame);
+ if (lock_frame == NULL)
+ goto out;
+
+ ret = dht_local_entrylk_init(lock_frame, lk_array, lk_count, entrylk_cbk);
+ if (ret < 0) {
+ goto out;
+ }
+
+ dht_set_lkowner(lk_array, lk_count, &lock_frame->root->lk_owner);
+
+ local = lock_frame->local;
+ local->main_frame = frame;
+
+ dht_blocking_entrylk_rec(lock_frame, 0);
+
+ return 0;
+out:
+ if (lock_frame)
+ dht_lock_stack_destroy(lock_frame, DHT_ENTRYLK);
+
+ return -1;
+}
+
+static int
+dht_local_inodelk_init(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_inodelk_cbk_t inodelk_cbk)
+{
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local == NULL) {
+ local = dht_local_init(frame, NULL, NULL, 0);
+ }
+
+ if (local == NULL) {
+ goto out;
+ }
+
+ local->lock[0].layout.my_layout.inodelk_cbk = inodelk_cbk;
+ local->lock[0].layout.my_layout.locks = lk_array;
+ local->lock[0].layout.my_layout.lk_count = lk_count;
+
+ ret = dht_lock_order_requests(local->lock[0].layout.my_layout.locks,
+ local->lock[0].layout.my_layout.lk_count);
+ if (ret < 0)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static void
+dht_inodelk_done(call_frame_t *lock_frame)
+{
+ fop_inodelk_cbk_t inodelk_cbk = NULL;
+ call_frame_t *main_frame = NULL;
+ dht_local_t *local = NULL;
+
+ local = lock_frame->local;
+ main_frame = local->main_frame;
+
+ local->lock[0].layout.my_layout.locks = NULL;
+ local->lock[0].layout.my_layout.lk_count = 0;
+
+ inodelk_cbk = local->lock[0].layout.my_layout.inodelk_cbk;
+ local->lock[0].layout.my_layout.inodelk_cbk = NULL;
+
+ inodelk_cbk(main_frame, NULL, main_frame->this,
+ local->lock[0].layout.my_layout.op_ret,
+ local->lock[0].layout.my_layout.op_errno, NULL);
+
+ dht_lock_stack_destroy(lock_frame, DHT_INODELK);
+ return;
+}
+
+static int32_t
+dht_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int lk_index = 0, call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ lk_index = (long)cookie;
+
+ local = frame->local;
+ if (op_ret < 0) {
+ uuid_utoa_r(local->lock[0].layout.my_layout.locks[lk_index]->loc.gfid,
+ gfid);
+
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
+ "name=%s",
+ local->lock[0].layout.my_layout.locks[lk_index]->xl->name,
+ "gfid=%s", gfid, NULL);
+ } else {
+ local->lock[0].layout.my_layout.locks[lk_index]->locked = 0;
+ }
+
+ call_cnt = dht_frame_return(frame);
+ if (is_last_call(call_cnt)) {
+ dht_inodelk_done(frame);
+ }
+
+ return 0;
+}
+
+static int32_t
+dht_unlock_inodelk_done(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ gf_uuid_unparse(local->lock[0].layout.my_layout.locks[0]->loc.inode->gfid,
+ gfid);
+
+ if (op_ret < 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_UNLOCK_GFID_FAILED, "DHT_LAYOUT_HEAL_DOMAIN gfid=%s",
+ gfid, NULL);
+ }
+
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
+
+int32_t
+dht_unlock_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_inodelk_cbk_t inodelk_cbk)
+{
+ dht_local_t *local = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+ int ret = -1, i = 0;
+ call_frame_t *lock_frame = NULL;
+ int call_cnt = 0;
+
+ GF_VALIDATE_OR_GOTO("dht-locks", frame, done);
+ GF_VALIDATE_OR_GOTO(frame->this->name, lk_array, done);
+ GF_VALIDATE_OR_GOTO(frame->this->name, inodelk_cbk, done);
+
+ call_cnt = dht_lock_count(lk_array, lk_count);
+ if (call_cnt == 0) {
+ ret = 0;
+ goto done;
+ }
+
+ lock_frame = dht_lock_frame(frame);
+ if (lock_frame == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS,
+ NULL);
+
+ dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
+ goto done;
+ }
+
+ ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk);
+ if (ret < 0) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK,
+ NULL);
+
+ dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
+
+ goto done;
+ }
+
+ local = lock_frame->local;
+ local->main_frame = frame;
+ local->call_cnt = call_cnt;
+
+ flock.l_type = F_UNLCK;
+
+ for (i = 0; i < local->lock[0].layout.my_layout.lk_count; i++) {
+ if (!local->lock[0].layout.my_layout.locks[i]->locked)
+ continue;
+
+ lock_frame->root
+ ->lk_owner = local->lock[0].layout.my_layout.locks[i]->lk_owner;
+ STACK_WIND_COOKIE(
+ lock_frame, dht_unlock_inodelk_cbk, (void *)(long)i,
+ local->lock[0].layout.my_layout.locks[i]->xl,
+ local->lock[0].layout.my_layout.locks[i]->xl->fops->inodelk,
+ local->lock[0].layout.my_layout.locks[i]->domain,
+ &local->lock[0].layout.my_layout.locks[i]->loc, F_SETLK, &flock,
+ NULL);
+ if (!--call_cnt)
+ break;
+ }
+
+ return 0;
+
+done:
+ if (lock_frame)
+ dht_lock_stack_destroy(lock_frame, DHT_INODELK);
+
+ /* no locks acquired, invoke inodelk_cbk */
+ if (ret == 0)
+ inodelk_cbk(frame, NULL, frame->this, 0, 0, NULL);
+
+ return ret;
+}
+
+int32_t
+dht_unlock_inodelk_wrapper(call_frame_t *frame, dht_ilock_wrap_t *inodelk)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+
+ local = frame->local;
+
+ if (!inodelk || !inodelk->locks)
+ goto out;
+
+ gf_uuid_unparse(local->loc.parent->gfid, pgfid);
+
+ lock_frame = copy_frame(frame);
+ if (lock_frame == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_COPY_FRAME_FAILED, "pgfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
+ goto done;
+ }
+
+ lock_local = dht_local_init(lock_frame, NULL, NULL, 0);
+ if (lock_local == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_CREATE_FAILED, "local", "gfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
+ goto done;
+ }
+
+ lock_frame->local = lock_local;
+
+ lock_local->lock[0].layout.my_layout.locks = inodelk->locks;
+ lock_local->lock[0].layout.my_layout.lk_count = inodelk->lk_count;
+ inodelk->locks = NULL;
+ inodelk->lk_count = 0;
+
+ ret = dht_unlock_inodelk(
+ lock_frame, lock_local->lock[0].layout.my_layout.locks,
+ lock_local->lock[0].layout.my_layout.lk_count, dht_unlock_inodelk_done);
+
+ if (ret)
+ goto done;
+
+ lock_frame = NULL;
+
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY(lock_frame);
+ }
+out:
+ return 0;
+}
+
+static int
+dht_inodelk_cleanup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_inodelk_done(frame);
+ return 0;
+}
+
+static void
+dht_inodelk_cleanup(call_frame_t *lock_frame)
+{
+ dht_lock_t **lk_array = NULL;
+ int lk_count = 0, lk_acquired = 0;
+ dht_local_t *local = NULL;
+
+ local = lock_frame->local;
+
+ lk_array = local->lock[0].layout.my_layout.locks;
+ lk_count = local->lock[0].layout.my_layout.lk_count;
+
+ lk_acquired = dht_lock_count(lk_array, lk_count);
+ if (lk_acquired != 0) {
+ dht_unlock_inodelk(lock_frame, lk_array, lk_count,
+ dht_inodelk_cleanup_cbk);
+ } else {
+ dht_inodelk_done(lock_frame);
+ }
+
+ return;
+}
+
+static int32_t
+dht_nonblocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int lk_index = 0, call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ lk_index = (long)cookie;
+
+ if (op_ret == -1) {
+ local->lock[0].layout.my_layout.op_ret = -1;
+ local->lock[0].layout.my_layout.op_errno = op_errno;
+
+ if (local && local->lock[0].layout.my_layout.locks[lk_index]) {
+ uuid_utoa_r(local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->loc.inode->gfid,
+ gfid);
+
+ gf_msg_debug(
+ this->name, op_errno,
+ "inodelk failed on gfid: %s "
+ "subvolume: %s",
+ gfid,
+ local->lock[0].layout.my_layout.locks[lk_index]->xl->name);
+ }
+
+ goto out;
+ }
+
+ local->lock[0].layout.my_layout.locks[lk_index]->locked = _gf_true;
+
+out:
+ call_cnt = dht_frame_return(frame);
+ if (is_last_call(call_cnt)) {
+ if (local->lock[0].layout.my_layout.op_ret < 0) {
+ dht_inodelk_cleanup(frame);
+ return 0;
+ }
+
+ dht_inodelk_done(frame);
+ }
+
+ return 0;
+}
+
+int
+dht_nonblocking_inodelk(call_frame_t *frame, dht_lock_t **lk_array,
+ int lk_count, fop_inodelk_cbk_t inodelk_cbk)
+{
+ struct gf_flock flock = {
+ 0,
+ };
+ int i = 0, ret = 0;
+ dht_local_t *local = NULL;
+ call_frame_t *lock_frame = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht-locks", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, lk_array, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, inodelk_cbk, out);
+
+ lock_frame = dht_lock_frame(frame);
+ if (lock_frame == NULL)
+ goto out;
+
+ ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk);
+ if (ret < 0) {
+ goto out;
+ }
+
+ dht_set_lkowner(lk_array, lk_count, &lock_frame->root->lk_owner);
+
+ local = lock_frame->local;
+ local->main_frame = frame;
+
+ local->call_cnt = lk_count;
+
+ for (i = 0; i < lk_count; i++) {
+ flock.l_type = local->lock[0].layout.my_layout.locks[i]->type;
+
+ STACK_WIND_COOKIE(
+ lock_frame, dht_nonblocking_inodelk_cbk, (void *)(long)i,
+ local->lock[0].layout.my_layout.locks[i]->xl,
+ local->lock[0].layout.my_layout.locks[i]->xl->fops->inodelk,
+ local->lock[0].layout.my_layout.locks[i]->domain,
+ &local->lock[0].layout.my_layout.locks[i]->loc, F_SETLK, &flock,
+ NULL);
+ }
+
+ return 0;
+
+out:
+ if (lock_frame)
+ dht_lock_stack_destroy(lock_frame, DHT_INODELK);
+
+ return -1;
+}
+
+static int32_t
+dht_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int lk_index = 0;
+ int i = 0;
+ dht_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+ dht_reaction_type_t reaction = 0;
+
+ lk_index = (long)cookie;
+
+ local = frame->local;
+ if (op_ret == 0) {
+ local->lock[0].layout.my_layout.locks[lk_index]->locked = _gf_true;
+ } else {
+ switch (op_errno) {
+ case ESTALE:
+ case ENOENT:
+ reaction = local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->do_on_failure;
+ if ((reaction != IGNORE_ENOENT_ESTALE) &&
+ (reaction != IGNORE_ENOENT_ESTALE_EIO)) {
+ gf_uuid_unparse(local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->loc.gfid,
+ gfid);
+ local->lock[0].layout.my_layout.op_ret = -1;
+ local->lock[0].layout.my_layout.op_errno = op_errno;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_INODELK_FAILED, "subvol=%s",
+ local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->xl->name,
+ "gfid=%s", gfid, NULL);
+ goto cleanup;
+ }
+ break;
+ case EIO:
+ reaction = local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->do_on_failure;
+ if (reaction != IGNORE_ENOENT_ESTALE_EIO) {
+ gf_uuid_unparse(local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->loc.gfid,
+ gfid);
+ local->lock[0].layout.my_layout.op_ret = -1;
+ local->lock[0].layout.my_layout.op_errno = op_errno;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_INODELK_FAILED, "subvol=%s",
+ local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->xl->name,
+ "gfid=%s", gfid, NULL);
+ goto cleanup;
+ }
+ break;
+
+ default:
+ gf_uuid_unparse(
+ local->lock[0].layout.my_layout.locks[lk_index]->loc.gfid,
+ gfid);
+ local->lock[0].layout.my_layout.op_ret = -1;
+ local->lock[0].layout.my_layout.op_errno = op_errno;
+ gf_smsg(
+ this->name, GF_LOG_ERROR, op_errno, DHT_MSG_INODELK_FAILED,
+ "subvol=%s",
+ local->lock[0].layout.my_layout.locks[lk_index]->xl->name,
+ "gfid=%s", gfid, NULL);
+ goto cleanup;
+ }
+ }
+
+ if (lk_index == (local->lock[0].layout.my_layout.lk_count - 1)) {
+ for (i = 0; (i < local->lock[0].layout.my_layout.lk_count) &&
+ (!local->lock[0].layout.my_layout.locks[i]->locked);
+ i++)
+ ;
+
+ if (i == local->lock[0].layout.my_layout.lk_count) {
+ local->lock[0].layout.my_layout.op_ret = -1;
+ local->lock[0].layout.my_layout.op_errno = op_errno;
+ }
+
+ dht_inodelk_done(frame);
+ } else {
+ dht_blocking_inodelk_rec(frame, ++lk_index);
+ }
+
+ return 0;
+
+cleanup:
+ dht_inodelk_cleanup(frame);
+
+ return 0;
+}
+
+void
+dht_blocking_inodelk_rec(call_frame_t *frame, int i)
+{
+ dht_local_t *local = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+
+ local = frame->local;
+
+ flock.l_type = local->lock[0].layout.my_layout.locks[i]->type;
+
+ STACK_WIND_COOKIE(
+ frame, dht_blocking_inodelk_cbk, (void *)(long)i,
+ local->lock[0].layout.my_layout.locks[i]->xl,
+ local->lock[0].layout.my_layout.locks[i]->xl->fops->inodelk,
+ local->lock[0].layout.my_layout.locks[i]->domain,
+ &local->lock[0].layout.my_layout.locks[i]->loc, F_SETLKW, &flock, NULL);
+
+ return;
+}
+
+int
+dht_blocking_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_inodelk_cbk_t inodelk_cbk)
+{
+ int ret = -1;
+ call_frame_t *lock_frame = NULL;
+ dht_local_t *local = NULL;
+ dht_local_t *tmp_local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("dht-locks", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, lk_array, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, inodelk_cbk, out);
+
+ tmp_local = frame->local;
+
+ lock_frame = dht_lock_frame(frame);
+ if (lock_frame == NULL) {
+ gf_uuid_unparse(tmp_local->loc.gfid, gfid);
+ gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCK_FRAME_FAILED,
+ "gfid=%s", gfid, "path=%s", tmp_local->loc.path, NULL);
+ goto out;
+ }
+
+ ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk);
+ if (ret < 0) {
+ gf_uuid_unparse(tmp_local->loc.gfid, gfid);
+ gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCAL_LOCK_INIT_FAILED,
+ "gfid=%s", gfid, "path=%s", tmp_local->loc.path, NULL);
+ goto out;
+ }
+
+ dht_set_lkowner(lk_array, lk_count, &lock_frame->root->lk_owner);
+
+ local = lock_frame->local;
+ local->main_frame = frame;
+
+ dht_blocking_inodelk_rec(lock_frame, 0);
+
+ return 0;
+out:
+ if (lock_frame)
+ dht_lock_stack_destroy(lock_frame, DHT_INODELK);
+
+ return -1;
+}
+
+void
+dht_unlock_namespace(call_frame_t *frame, dht_dir_transaction_t *lock)
+{
+ GF_VALIDATE_OR_GOTO("dht-locks", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, lock, out);
+
+ dht_unlock_entrylk_wrapper(frame, &lock->ns.directory_ns);
+ dht_unlock_inodelk_wrapper(frame, &lock->ns.parent_layout);
+
+out:
+ return;
+}
+
+static int32_t
+dht_protect_namespace_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+ if (op_ret != 0)
+ dht_unlock_inodelk_wrapper(frame, &local->current->ns.parent_layout);
+
+ local->current->ns.ns_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+dht_blocking_entrylk_after_inodelk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ loc_t *loc = NULL;
+ dht_lock_t **lk_array = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int count = 0;
+ dht_elock_wrap_t *entrylk = NULL;
+
+ local = frame->local;
+ entrylk = &local->current->ns.directory_ns;
+
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ loc = &entrylk->locks[0]->loc;
+ gf_uuid_unparse(loc->gfid, pgfid);
+
+ local->op_ret = 0;
+ lk_array = entrylk->locks;
+ count = entrylk->lk_count;
+
+ ret = dht_blocking_entrylk(frame, lk_array, count,
+ dht_protect_namespace_cbk);
+
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_ENTRYLK_FAILED_AFT_INODELK, "fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "basename=%s",
+ entrylk->locks[0]->basename, NULL);
+ goto err;
+ }
+
+ return 0;
+
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free(lk_array, count);
+ GF_FREE(lk_array);
+ entrylk->locks = NULL;
+ entrylk->lk_count = 0;
+ }
+
+ /* Unlock inodelk. No harm calling unlock twice */
+ dht_unlock_inodelk_wrapper(frame, &local->current->ns.parent_layout);
+ /* Call ns_cbk. It will take care of unwinding */
+ local->current->ns.ns_cbk(frame, NULL, this, local->op_ret, local->op_errno,
+ NULL);
+ return 0;
+}
+
+/* Given the loc and the subvol, this routine takes the inodelk on
+ * the parent inode and entrylk on (parent, loc->name). This routine
+ * is specific as it supports only one subvol on which it takes inodelk
+ * and then entrylk serially.
+ */
+int
+dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
+ struct dht_namespace *ns, fop_entrylk_cbk_t ns_cbk)
+{
+ dht_ilock_wrap_t *inodelk = NULL;
+ dht_elock_wrap_t *entrylk = NULL;
+ dht_lock_t **lk_array = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ loc_t parent = {
+ 0,
+ };
+ int ret = -1;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int32_t op_errno = 0;
+ int count = 1;
+
+ GF_VALIDATE_OR_GOTO("dht-locks", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, loc->parent, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, subvol, out);
+
+ local = frame->local;
+ this = frame->this;
+
+ inodelk = &ns->parent_layout;
+ entrylk = &ns->directory_ns;
+
+ /* Initialize entrylk_cbk and parent loc */
+ ns->ns_cbk = ns_cbk;
+
+ ret = dht_build_parent_loc(this, &parent, loc, &op_errno);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_LOC_FAILED,
+ "gfid=%s", loc->gfid, "name=%s", loc->name, "path=%s",
+ loc->path, NULL);
+ goto out;
+ }
+ gf_uuid_unparse(parent.gfid, pgfid);
+
+ /* Alloc inodelk */
+ inodelk->locks = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
+ if (inodelk->locks == NULL) {
+ local->op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_CALLOC_FAILED, "fop=%s", gf_fop_list[local->fop],
+ "pgfid=%s", pgfid, "name=%s", loc->name, "path=%s", loc->path,
+ NULL);
+ goto out;
+ }
+
+ inodelk->locks[0] = dht_lock_new(this, subvol, &parent, F_RDLCK,
+ DHT_LAYOUT_HEAL_DOMAIN, NULL,
+ FAIL_ON_ANY_ERROR);
+ if (inodelk->locks[0] == NULL) {
+ local->op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_LOCK_ALLOC_FAILED, "inodelk-fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s",
+ loc->name, "path=%s", loc->path, NULL);
+ goto err;
+ }
+ inodelk->lk_count = count;
+
+ /* Allock entrylk */
+ entrylk->locks = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
+ if (entrylk->locks == NULL) {
+ local->op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_CALLOC_FAILED, "entrylk-fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s",
+ loc->name, "path=%s", loc->path, NULL);
+
+ goto err;
+ }
+
+ entrylk->locks[0] = dht_lock_new(this, subvol, &parent, F_WRLCK,
+ DHT_ENTRY_SYNC_DOMAIN, loc->name,
+ FAIL_ON_ANY_ERROR);
+ if (entrylk->locks[0] == NULL) {
+ local->op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_LOCK_ALLOC_FAILED, "entrylk-fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s",
+ loc->name, "path=%s", loc->path, NULL);
+
+ goto err;
+ }
+ entrylk->lk_count = count;
+
+ /* Take read inodelk on parent. If it is successful, take write entrylk
+ * on name in cbk.
+ */
+ lk_array = inodelk->locks;
+ ret = dht_blocking_inodelk(frame, lk_array, count,
+ dht_blocking_entrylk_after_inodelk);
+ if (ret < 0) {
+ local->op_errno = EIO;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_BLOCK_INODELK_FAILED, "fop=%s", gf_fop_list[local->fop],
+ "pgfid=%s", pgfid, "name=%s", loc->name, "path=%s", loc->path,
+ NULL);
+
+ goto err;
+ }
+
+ loc_wipe(&parent);
+
+ return 0;
+err:
+ if (entrylk->locks != NULL) {
+ dht_lock_array_free(entrylk->locks, count);
+ GF_FREE(entrylk->locks);
+ entrylk->locks = NULL;
+ entrylk->lk_count = 0;
+ }
+
+ if (inodelk->locks != NULL) {
+ dht_lock_array_free(inodelk->locks, count);
+ GF_FREE(inodelk->locks);
+ inodelk->locks = NULL;
+ inodelk->lk_count = 0;
+ }
+
+ loc_wipe(&parent);
+out:
+ return -1;
+}
diff --git a/xlators/cluster/dht/src/dht-lock.h b/xlators/cluster/dht/src/dht-lock.h
new file mode 100644
index 00000000000..6485c03fb6e
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-lock.h
@@ -0,0 +1,91 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _DHT_LOCK_H
+#define _DHT_LOCK_H
+
+#include "dht-common.h"
+
+void
+dht_lock_array_free(dht_lock_t **lk_array, int count);
+
+int32_t
+dht_lock_count(dht_lock_t **lk_array, int lk_count);
+
+dht_lock_t *
+dht_lock_new(xlator_t *this, xlator_t *xl, loc_t *loc, short type,
+ const char *domain, const char *basename,
+ dht_reaction_type_t do_on_failure);
+
+int32_t
+dht_unlock_entrylk_wrapper(call_frame_t *, dht_elock_wrap_t *);
+
+void
+dht_blocking_entrylk_rec(call_frame_t *frame, int i);
+
+int
+dht_blocking_entrylk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_inodelk_cbk_t entrylk_cbk);
+
+int32_t
+dht_unlock_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_inodelk_cbk_t inodelk_cbk);
+
+int32_t
+dht_unlock_inodelk_wrapper(call_frame_t *, dht_ilock_wrap_t *);
+
+/* Acquire non-blocking inodelk on a list of xlators.
+ *
+ * @lk_array: array of lock requests lock on.
+ *
+ * @lk_count: number of locks in @lk_array
+ *
+ * @inodelk_cbk: will be called after inodelk replies are received
+ *
+ * @retval: -1 if stack_winding inodelk fails. 0 otherwise.
+ * inodelk_cbk is called with appropriate error on errors.
+ * On failure to acquire lock on all members of list, successful
+ * locks are unlocked before invoking cbk.
+ */
+
+int
+dht_nonblocking_inodelk(call_frame_t *frame, dht_lock_t **lk_array,
+ int lk_count, fop_inodelk_cbk_t inodelk_cbk);
+
+void
+dht_blocking_inodelk_rec(call_frame_t *frame, int i);
+
+/* same as dht_nonblocking_inodelk, but issues sequential blocking locks on
+ * @lk_array directly. locks are issued on some order which remains same
+ * for a list of xlators (irrespective of order of xlators within list).
+ */
+
+int
+dht_blocking_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_inodelk_cbk_t inodelk_cbk);
+
+int32_t
+dht_blocking_entrylk_after_inodelk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int32_t
+dht_blocking_entrylk_after_inodelk_rename(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+void
+dht_unlock_namespace(call_frame_t *, dht_dir_transaction_t *);
+
+int
+dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
+ struct dht_namespace *ns, fop_entrylk_cbk_t ns_cbk);
+
+#endif /* _DHT_LOCK_H */
diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h
index e893eb48fd8..e3c4471334a 100644
--- a/xlators/cluster/dht/src/dht-mem-types.h
+++ b/xlators/cluster/dht/src/dht-mem-types.h
@@ -8,28 +8,31 @@
cases as published by the Free Software Foundation.
*/
-
#ifndef __DHT_MEM_TYPES_H__
#define __DHT_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_dht_mem_types_ {
- gf_dht_mt_dht_du_t = gf_common_mt_end + 1,
- gf_dht_mt_dht_conf_t,
- gf_dht_mt_char,
- gf_dht_mt_int32_t,
- gf_dht_mt_xlator_t,
- gf_dht_mt_dht_layout_t,
- gf_switch_mt_dht_conf_t,
- gf_switch_mt_dht_du_t,
- gf_switch_mt_switch_sched_array,
- gf_switch_mt_switch_struct,
- gf_dht_mt_subvol_time,
- gf_dht_mt_loc_t,
- gf_defrag_info_mt,
- gf_dht_mt_inode_ctx_t,
- gf_dht_mt_ctx_stat_time_t,
- gf_dht_mt_end
+ gf_dht_mt_dht_du_t = gf_common_mt_end + 1,
+ gf_dht_mt_dht_conf_t,
+ gf_dht_mt_char,
+ gf_dht_mt_int32_t,
+ gf_dht_mt_xlator_t,
+ gf_dht_mt_dht_layout_t,
+ gf_switch_mt_switch_sched_array,
+ gf_switch_mt_switch_struct,
+ gf_dht_mt_subvol_time,
+ gf_dht_mt_loc_t,
+ gf_defrag_info_mt,
+ gf_dht_mt_inode_ctx_t,
+ gf_dht_mt_dirent_t,
+ gf_dht_mt_container_t,
+ gf_dht_mt_octx_t,
+ gf_dht_mt_miginfo_t,
+ gf_dht_mt_fd_ctx_t,
+ gf_dht_ret_cache_t,
+ gf_dht_nodeuuids_t,
+ gf_dht_mt_end
};
#endif
diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h
index eb4c356e3c2..601f8dad78b 100644
--- a/xlators/cluster/dht/src/dht-messages.h
+++ b/xlators/cluster/dht/src/dht-messages.h
@@ -1,4 +1,4 @@
-/*Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -10,456 +10,377 @@
#ifndef _DHT_MESSAGES_H_
#define _DHT_MESSAGES_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glfs-message-id.h"
-
-/*! \file dht-messages.h
- * \brief DHT log-message IDs and their descriptions
- *
- */
-
-/* NOTE: Rules for message additions
- * 1) Each instance of a message is _better_ left with a unique message ID, even
- * if the message format is the same. Reasoning is that, if the message
- * format needs to change in one instance, the other instances are not
- * impacted or the new change does not change the ID of the instance being
- * modified.
- * 2) Addition of a message,
- * - Should increment the GLFS_NUM_MESSAGES
- * - Append to the list of messages defined, towards the end
- * - Retain macro naming as glfs_msg_X (for redability across developers)
- * NOTE: Rules for message format modifications
- * 3) Check acorss the code if the message ID macro in question is reused
- * anywhere. If reused then then the modifications should ensure correctness
- * everywhere, or needs a new message ID as (1) above was not adhered to. If
- * not used anywhere, proceed with the required modification.
- * NOTE: Rules for message deletion
- * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
- * anywhere, then can be deleted, but will leave a hole by design, as
- * addition rules specify modification to the end of the list and not filling
- * holes.
- */
-
-#define GLFS_DHT_BASE GLFS_MSGID_COMP_DHT
-#define GLFS_DHT_NUM_MESSAGES 36
-#define GLFS_MSGID_END (GLFS_DHT_BASE + GLFS_DHT_NUM_MESSAGES + 1)
-
-/* Messages with message IDs */
-#define glfs_msg_start_x GLFS_DHT_BASE, "Invalid: Start of messages"
-
-
-
-
-/*!
- * @messageid 109001
- * @diagnosis Cached subvolume could not be found for the specified
- * path
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_CACHED_SUBVOL_GET_FAILED (GLFS_DHT_BASE + 1)
-
-/*!
- * @messageid 109002
- * @diagnosis Linkfile creation failed
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_CREATE_LINK_FAILED (GLFS_DHT_BASE + 2)
-
-/*!
- * @messageid 109003
- * @diagnosis The value could not be set for the specified key in
- * the dictionary
- *
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_DICT_SET_FAILED (GLFS_DHT_BASE + 3)
-
-/*!
- * @messageid 109004
- * @diagnosis Directory attributes could not be healed
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_DIR_ATTR_HEAL_FAILED (GLFS_DHT_BASE + 4)
-
-/*!
- * @messageid 109005
- * @diagnosis Self-heal failed for the specified directory
- * @recommendedaction Ensure that all subvolumes are online
- * and reachable and perform a lookup operation
- * on the directory again.
- *
- */
-
-#define DHT_MSG_DIR_SELFHEAL_FAILED (GLFS_DHT_BASE + 5)
-
-/*!
- * @messageid 109006
- * @diagnosis The extended attributes could not be healed for
- * the specified directory on the specified subvolume
- *
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_DIR_SELFHEAL_XATTR_FAILED (GLFS_DHT_BASE + 6)
-
-/*!
- * @messageid 109007
- * @diagnosis A lookup operation found the file with the same path
- * on multiple subvolumes.
- * @recommendedaction
- * 1. Create backups of the file on other subvolumes.
- * 2. Inspect the content of the files to identify
- * and retain the most appropriate file.
- *
- */
-
-#define DHT_MSG_FILE_ON_MULT_SUBVOL (GLFS_DHT_BASE + 7)
-
-/*!
- * @messageid 109008
- * @diagnosis A path resolves to a file on one subvolume and a directory
- * on another
- * @recommendedaction
- * 1. Create a backup of the file with a different name
- * and delete the original file.
- * 2. In the newly created back up file, remove the "trusted.gfid"
- * extended attribute.
- * - Command: setfattr -x "trusted.gfid" \<path to the newly created backup file\>
- * 3. Perform a new lookup operation on both the new and old paths.
- * 4. From the mount point, inspect both the paths and retain the
- * relevant file or directory.
- *
- */
-
-#define DHT_MSG_FILE_TYPE_MISMATCH (GLFS_DHT_BASE + 8)
-
-/*!
- * @messageid 109009
- * @diagnosis The GFID of the file/directory is different on different subvolumes
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_GFID_MISMATCH (GLFS_DHT_BASE + 9)
-
-/*!
- * @messageid 109010
- * @diagnosis The GFID of the specified file/directory is NULL.
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_GFID_NULL (GLFS_DHT_BASE + 10)
-
-/*!
- * @messageid 109011
- * @diagnosis The hashed subvolume could not be found for the specified
- * file/directory
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_HASHED_SUBVOL_GET_FAILED (GLFS_DHT_BASE + 11)
-
-/*!
- * @messageid 109012
- * @diagnosis The Distributed Hash Table Translator could not be initiated as the
- * system is out of memory.
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_INIT_FAILED (GLFS_DHT_BASE + 12)
-
-/*!
- * @messageid 109013
- * @diagnosis Invalid DHT configuration in the volfile
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_INVALID_CONFIGURATION (GLFS_DHT_BASE + 13)
-
-/*!
- * @messageid 109014
- * @diagnosis Invalid disk layout
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_INVALID_DISK_LAYOUT (GLFS_DHT_BASE + 14)
-
-/*!
- * @messageid 109015
- * @diagnosis Invalid DHT configuration option.
- * @recommendedaction
- * 1. Reset the option with a valid value using the volume
- * set command.
- * 2. Restart the process that logged the message in the
- * log file.
- *
- */
-
-#define DHT_MSG_INVALID_OPTION (GLFS_DHT_BASE + 15)
-
-/*!
- * @messageid 109016
- * @diagnosis The fix layout operation failed
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_LAYOUT_FIX_FAILED (GLFS_DHT_BASE + 16)
-
-/*!
- * @messageid 109017
- * @diagnosis Layout merge failed
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_LAYOUT_MERGE_FAILED (GLFS_DHT_BASE + 17)
-
-/*!
- * @messageid 109018
- * @diagnosis The layout for the specified directory does not match
- that on the disk.
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_LAYOUT_MISMATCH (GLFS_DHT_BASE + 18)
-
-/*!
- * @messageid 109019
- * @diagnosis No layout is present for the specified file/directory
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_LAYOUT_NULL (GLFS_DHT_BASE + 19)
-
-/*!
- * @messageid 109020
- * @diagnosis Informational message: Migration of data from the cached
- * subvolume to the hashed subvolume is complete
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_MIGRATE_DATA_COMPLETE (GLFS_DHT_BASE + 20)
-
-/*!
- * @messageid 109021
- * @diagnosis Migration of data failed during the rebalance operation
- * \n Cause: Directories could not be read to identify the files for the
- * migration process.
- * @recommendedaction
- * The log message would indicate the reason for the failure and
- * the corrective action depends on the specific error that is
- * encountered. The error is one of the standard UNIX errors.
- *
- */
-
-#define DHT_MSG_MIGRATE_DATA_FAILED (GLFS_DHT_BASE + 21)
-
-/*!
- * @messageid 109022
- * @diagnosis Informational message: The file was migrated successfully during
- * the rebalance operation.
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_MIGRATE_FILE_COMPLETE (GLFS_DHT_BASE + 22)
-
-/*!
- * @messageid 109023
- * @diagnosis File migration failed during the rebalance operation
- * \n Cause: Rebalance moves data from the cached subvolume to
- * the hashed subvolume. Migrating a single file is a multi-step operation
- * which involves opening, reading, and writing the data and metadata.
- * Any failures in this multi-step operation can result in a file
- * migration failure.
- * @recommendedaction The log message would indicate the reason for the failure and the
- * corrective action depends on the specific error that is encountered.
- * The error is one of the standard UNIX errors.
- *
- */
-
-#define DHT_MSG_MIGRATE_FILE_FAILED (GLFS_DHT_BASE + 23)
-
-/*!
- * @messageid 109024
- * @diagnosis Out of memory
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_NO_MEMORY (GLFS_DHT_BASE + 24)
-
-/*!
- * @messageid 109025
- * @diagnosis The opendir() call failed on the specified directory
- * \n Cause: When a directory is renamed, the Distribute Hash
- * table translator checks whether the destination directory
- * is empty. This message indicates that the opendir() call
- * on the destination directory has failed.
- * @recommendedaction The log message would indicate the reason for the
- * failure and the corrective action depends on the specific
- * error that is encountered. The error is one of the standard
- * UNIX errors.
- *
- */
-
-#define DHT_MSG_OPENDIR_FAILED (GLFS_DHT_BASE + 25)
-
-/*!
- * @messageid 109026
- * @diagnosis The rebalance operation failed.
- * @recommendedaction Check the log file for details about the failure.
- * Possible causes:
- * - A subvolume is down: Restart the rebalance operation after
- * bringing up all subvolumes.
- *
- */
-
-#define DHT_MSG_REBALANCE_FAILED (GLFS_DHT_BASE + 26)
-
-/*!
- * @messageid 109027
- * @diagnosis Failed to start the rebalance process.
- * @recommendedaction Check the log file for details about the failure.
- *
- */
-
-#define DHT_MSG_REBALANCE_START_FAILED (GLFS_DHT_BASE + 27)
-
-/*!
- * @messageid 109028
- * @diagnosis Informational message that indicates the status of the
- * rebalance operation and details as to how many files were
- * migrated, skipped, failed etc
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_REBALANCE_STATUS (GLFS_DHT_BASE + 28)
-
-/*!
- * @messageid 109029
- * @diagnosis The rebalance operation was aborted by the user.
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_REBALANCE_STOPPED (GLFS_DHT_BASE + 29)
-
-/*!
- * @messageid 109030
- * @diagnosis The file or directory could not be renamed
- * @recommendedaction Ensure that all the subvolumes are
- * online and reachable and try renaming
- * the file or directory again.
- *
- */
-
-#define DHT_MSG_RENAME_FAILED (GLFS_DHT_BASE + 30)
-
-/*!
- * @messageid 109031
- * @diagnosis Attributes could not be set for the specified file or
- * directory.
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_SETATTR_FAILED (GLFS_DHT_BASE + 31)
-
-/*!
- * @messageid 109032
- * @diagnosis The specified subvolume is running out of file system inodes.
- If all subvolumes run out of inodes, then new files cannot be created.
- * @recommendedaction Consider adding more nodes to the cluster if all subvolumes
- * run out of inodes
- *
- */
-
-#define DHT_MSG_SUBVOL_INSUFF_INODES (GLFS_DHT_BASE + 32)
-
-/*!
- * @messageid 109033
- * @diagnosis The specified subvolume is running out of disk space. If all
- subvolumes run out of space, new files cannot be created.
- * @recommendedaction Consider adding more bricks to the cluster if all subvolumes
- * run out of disk space.
- *
- */
-
-#define DHT_MSG_SUBVOL_INSUFF_SPACE (GLFS_DHT_BASE + 33)
-
-/*!
- * @messageid 109034
- * @diagnosis Failed to unlink the specified file/directory
- * @recommendedaction The log message would indicate the reason
- for the failure and the corrective action depends on
- the specific error that is encountered.
- */
-
-#define DHT_MSG_UNLINK_FAILED (GLFS_DHT_BASE + 34)
-
-
-
-/*!
- * @messageid 109035
- * @diagnosis The layout information could not be set in the inode
- * @recommendedaction None
- *
- */
-
-#define DHT_MSG_LAYOUT_SET_FAILED (GLFS_DHT_BASE + 35)
-
-/*!
- * @messageid 109036
- * @diagnosis Informational message regarding layout range distribution
- * for a directory across subvolumes
- * @recommendedaction None
- */
-
-#define DHT_MSG_LOG_FIXED_LAYOUT (GLFS_DHT_BASE + 36)
-
-/*
- * @messageid 109037
- * @diagnosis Informational message regarding error in tier operation
- * @recommendedaction None
- */
-
-#define DHT_MSG_LOG_TIER_ERROR (GLFS_DHT_BASE + 37)
-
-/*
- * @messageid 109038
- * @diagnosis Informational message regarding tier operation
- * @recommendedaction None
- */
-
-#define DHT_MSG_LOG_TIER_STATUS (GLFS_DHT_BASE + 38)
-
-
-/*------------*/
-#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
-
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(
+ DHT, DHT_MSG_CACHED_SUBVOL_GET_FAILED, DHT_MSG_CREATE_LINK_FAILED,
+ DHT_MSG_DICT_SET_FAILED, DHT_MSG_DIR_ATTR_HEAL_FAILED,
+ DHT_MSG_DIR_SELFHEAL_FAILED, DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
+ DHT_MSG_FILE_ON_MULT_SUBVOL, DHT_MSG_FILE_TYPE_MISMATCH,
+ DHT_MSG_GFID_MISMATCH, DHT_MSG_GFID_NULL, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ DHT_MSG_INIT_FAILED, DHT_MSG_INVALID_CONFIGURATION,
+ DHT_MSG_INVALID_DISK_LAYOUT, DHT_MSG_INVALID_OPTION,
+ DHT_MSG_LAYOUT_FIX_FAILED, DHT_MSG_LAYOUT_MERGE_FAILED,
+ DHT_MSG_LAYOUT_MISMATCH, DHT_MSG_LAYOUT_NULL, DHT_MSG_MIGRATE_DATA_COMPLETE,
+ DHT_MSG_MIGRATE_DATA_FAILED, DHT_MSG_MIGRATE_FILE_COMPLETE,
+ DHT_MSG_MIGRATE_FILE_FAILED, DHT_MSG_NO_MEMORY, DHT_MSG_OPENDIR_FAILED,
+ DHT_MSG_REBALANCE_FAILED, DHT_MSG_REBALANCE_START_FAILED,
+ DHT_MSG_REBALANCE_STATUS, DHT_MSG_REBALANCE_STOPPED, DHT_MSG_RENAME_FAILED,
+ DHT_MSG_SETATTR_FAILED, DHT_MSG_SUBVOL_INSUFF_INODES,
+ DHT_MSG_SUBVOL_INSUFF_SPACE, DHT_MSG_UNLINK_FAILED,
+ DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT,
+ DHT_MSG_GET_XATTR_FAILED, DHT_MSG_FILE_LOOKUP_FAILED,
+ DHT_MSG_OPEN_FD_FAILED, DHT_MSG_SET_INODE_CTX_FAILED,
+ DHT_MSG_UNLOCKING_FAILED, DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO,
+ DHT_MSG_CHUNK_SIZE_INFO, DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR,
+ DHT_MSG_LAYOUT_SORT_FAILED, DHT_MSG_REGEX_INFO, DHT_MSG_FOPEN_FAILED,
+ DHT_MSG_SET_HOSTNAME_FAILED, DHT_MSG_BRICK_ERROR, DHT_MSG_SYNCOP_FAILED,
+ DHT_MSG_MIGRATE_INFO, DHT_MSG_SOCKET_ERROR, DHT_MSG_CREATE_FD_FAILED,
+ DHT_MSG_READDIR_ERROR, DHT_MSG_CHILD_LOC_BUILD_FAILED,
+ DHT_MSG_SET_SWITCH_PATTERN_ERROR, DHT_MSG_COMPUTE_HASH_FAILED,
+ DHT_MSG_FIND_LAYOUT_ANOMALIES_ERROR, DHT_MSG_ANOMALIES_INFO,
+ DHT_MSG_LAYOUT_INFO, DHT_MSG_INODE_LK_ERROR, DHT_MSG_RENAME_INFO,
+ DHT_MSG_DATA_NULL, DHT_MSG_AGGREGATE_QUOTA_XATTR_FAILED,
+ DHT_MSG_UNLINK_LOOKUP_INFO, DHT_MSG_LINK_FILE_LOOKUP_INFO,
+ DHT_MSG_OPERATION_NOT_SUP, DHT_MSG_NOT_LINK_FILE_ERROR, DHT_MSG_CHILD_DOWN,
+ DHT_MSG_UUID_PARSE_ERROR, DHT_MSG_GET_DISK_INFO_ERROR,
+ DHT_MSG_INVALID_VALUE, DHT_MSG_SWITCH_PATTERN_INFO,
+ DHT_MSG_SUBVOL_OP_FAILED, DHT_MSG_LAYOUT_PRESET_FAILED,
+ DHT_MSG_INVALID_LINKFILE, DHT_MSG_FIX_LAYOUT_INFO,
+ DHT_MSG_GET_HOSTNAME_FAILED, DHT_MSG_WRITE_FAILED,
+ DHT_MSG_MIGRATE_HARDLINK_FILE_FAILED, DHT_MSG_FSYNC_FAILED,
+ DHT_MSG_SUBVOL_DECOMMISSION_INFO, DHT_MSG_BRICK_QUERY_FAILED,
+ DHT_MSG_SUBVOL_NO_LAYOUT_INFO, DHT_MSG_OPEN_FD_ON_DST_FAILED,
+ DHT_MSG_SUBVOL_NOT_FOUND, DHT_MSG_FILE_LOOKUP_ON_DST_FAILED,
+ DHT_MSG_DISK_LAYOUT_MISSING, DHT_MSG_DICT_GET_FAILED,
+ DHT_MSG_REVALIDATE_CBK_INFO, DHT_MSG_UPGRADE_BRICKS, DHT_MSG_LK_ARRAY_INFO,
+ DHT_MSG_RENAME_NOT_LOCAL, DHT_MSG_RECONFIGURE_INFO,
+ DHT_MSG_INIT_LOCAL_SUBVOL_FAILED, DHT_MSG_SYS_CALL_GET_TIME_FAILED,
+ DHT_MSG_NO_DISK_USAGE_STATUS, DHT_MSG_SUBVOL_DOWN_ERROR,
+ DHT_MSG_REBAL_THROTTLE_INFO, DHT_MSG_COMMIT_HASH_INFO,
+ DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_SETTLE_HASH_FAILED,
+ DHT_MSG_DEFRAG_PROCESS_DIR_FAILED, DHT_MSG_FD_CTX_SET_FAILED,
+ DHT_MSG_STALE_LOOKUP, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ DHT_MSG_LOCK_MIGRATION_FAILED, DHT_MSG_LOCK_INODE_UNREF_FAILED,
+ DHT_MSG_ASPRINTF_FAILED, DHT_MSG_DIR_LOOKUP_FAILED, DHT_MSG_INODELK_FAILED,
+ DHT_MSG_LOCK_FRAME_FAILED, DHT_MSG_LOCAL_LOCK_INIT_FAILED,
+ DHT_MSG_ENTRYLK_ERROR, DHT_MSG_INODELK_ERROR, DHT_MSG_LOC_FAILED,
+ DHT_MSG_UNKNOWN_FOP, DHT_MSG_MIGRATE_FILE_SKIPPED,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, DHT_MSG_HASHED_SUBVOL_DOWN,
+ DHT_MSG_NON_HASHED_SUBVOL_DOWN, DHT_MSG_SYNCTASK_CREATE_FAILED,
+ DHT_MSG_DIR_HEAL_ABORT, DHT_MSG_MIGRATE_SKIP, DHT_MSG_FD_CREATE_FAILED,
+ DHT_MSG_DICT_NEW_FAILED, DHT_MSG_FAILED_TO_OPEN, DHT_MSG_CREATE_FAILED,
+ DHT_MSG_FILE_NOT_EXIST, DHT_MSG_CHOWN_FAILED, DHT_MSG_FALLOCATE_FAILED,
+ DHT_MSG_FTRUNCATE_FAILED, DHT_MSG_STATFS_FAILED, DHT_MSG_WRITE_CROSS,
+ DHT_MSG_NEW_TARGET_FOUND, DHT_MSG_INSUFF_MEMORY, DHT_MSG_SET_XATTR_FAILED,
+ DHT_MSG_SET_MODE_FAILED, DHT_MSG_FILE_EXISTS_IN_DEST,
+ DHT_MSG_SYMLINK_FAILED, DHT_MSG_LINKFILE_DEL_FAILED, DHT_MSG_MKNOD_FAILED,
+ DHT_MSG_MIGRATE_CLEANUP_FAILED, DHT_MSG_LOCK_MIGRATE,
+ DHT_MSG_PARENT_BUILD_FAILED, DHT_MSG_HASHED_SUBVOL_NOT_FOUND,
+ DHT_MSG_ACQUIRE_ENTRYLK_FAILED, DHT_MSG_CREATE_DST_FAILED,
+ DHT_MSG_MIGRATION_EXIT, DHT_MSG_CHANGED_DST, DHT_MSG_TRACE_FAILED,
+ DHT_MSG_WRITE_LOCK_FAILED, DHT_MSG_GETACTIVELK_FAILED, DHT_MSG_STAT_FAILED,
+ DHT_MSG_UNLINK_PERFORM_FAILED, DHT_MSG_CLANUP_SOURCE_FILE_FAILED,
+ DHT_MSG_UNLOCK_FILE_FAILED, DHT_MSG_REMOVE_XATTR_FAILED,
+ DHT_MSG_DATA_MIGRATE_ABORT, DHT_MSG_DEFRAG_NULL, DHT_MSG_PARENT_NULL,
+ DHT_MSG_GFID_NOT_PRESENT, DHT_MSG_CHILD_LOC_FAILED,
+ DHT_MSG_SET_LOOKUP_FAILED, DHT_MSG_DIR_REMOVED, DHT_MSG_FIX_NOT_COMP,
+ DHT_MSG_SUBVOL_DETER_FAILED, DHT_MSG_LOCAL_SUBVOL, DHT_MSG_NODE_UUID,
+ DHT_MSG_SIZE_FILE, DHT_MSG_GET_DATA_SIZE_FAILED,
+ DHT_MSG_PTHREAD_JOIN_FAILED, DHT_MSG_COUNTER_THREAD_CREATE_FAILED,
+ DHT_MSG_MIGRATION_INIT_QUEUE_FAILED, DHT_MSG_PAUSED_TIMEOUT, DHT_MSG_WOKE,
+ DHT_MSG_ABORT_REBALANCE, DHT_MSG_CREATE_TASK_REBAL_FAILED,
+ DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL, DHT_MSG_ADD_CHOICES_ERROR,
+ DHT_MSG_GET_CHOICES_ERROR, DHT_MSG_PREPARE_STATUS_ERROR,
+ DHT_MSG_SET_CHOICE_FAILED, DHT_MSG_SET_HASHED_SUBVOL_FAILED,
+ DHT_MSG_XATTR_HEAL_NOT_POSS, DHT_MSG_LINKTO_FILE_FAILED,
+ DHT_MSG_STALE_LINKFILE_DELETE, DHT_MSG_NO_SUBVOL_FOR_LINKTO,
+ DHT_MSG_SUBVOL_RETURNED, DHT_MSG_UNKNOWN_LOCAL_XSEL, DHT_MSG_GET_XATTR_ERR,
+ DHT_MSG_ALLOC_OR_FILL_FAILED, DHT_MSG_GET_REAL_NAME_FAILED,
+ DHT_MSG_COPY_UUID_FAILED, DHT_MSG_MDS_DETER_FAILED,
+ DHT_MSG_CREATE_REBAL_FAILED, DHT_MSG_LINK_LAYOUT_FAILED,
+ DHT_MSG_NO_SUBVOL_IN_LAYOUT, DHT_MSG_MEM_ALLOC_FAILED,
+ DHT_MSG_SET_IN_PARAMS_DICT_FAILED, DHT_MSG_LOC_COPY_FAILED,
+ DHT_MSG_PARENT_LOC_FAILED, DHT_MSG_CREATE_LOCK_FAILED,
+ DHT_MSG_PREV_ATTEMPT_FAILED, DHT_MSG_REFRESH_ATTEMPT,
+ DHT_MSG_ACQUIRE_LOCK_FAILED, DHT_MSG_CREATE_STUB_FAILED,
+ DHT_MSG_WIND_LOCK_REQ_FAILED, DHT_MSG_REFRESH_FAILED,
+ DHT_MSG_CACHED_SUBVOL_ERROR, DHT_MSG_NO_LINK_SUBVOL, DHT_MSG_SET_KEY_FAILED,
+ DHT_MSG_REMOVE_LINKTO_FAILED, DHT_MSG_LAYOUT_DICT_SET_FAILED,
+ DHT_MSG_XATTR_DICT_NULL, DHT_MSG_DUMMY_ALLOC_FAILED, DHT_MSG_DICT_IS_NULL,
+ DHT_MSG_LINK_INODE_FAILED, DHT_MSG_SELFHEAL_FAILED, DHT_MSG_NO_MDS_SUBVOL,
+ DHT_MSG_LIST_XATTRS_FAILED, DHT_MSG_RESET_INTER_XATTR_FAILED,
+ DHT_MSG_MDS_DOWN_UNABLE_TO_SET, DHT_MSG_WIND_UNLOCK_FAILED,
+ DHT_MSG_COMMIT_HASH_FAILED, DHT_MSG_UNLOCK_GFID_FAILED,
+ DHT_MSG_UNLOCK_FOLLOW_ENTRYLK, DHT_MSG_COPY_FRAME_FAILED,
+ DHT_MSG_UNLOCK_FOLLOW_LOCKS, DHT_MSG_ENTRYLK_FAILED_AFT_INODELK,
+ DHT_MSG_CALLOC_FAILED, DHT_MSG_LOCK_ALLOC_FAILED,
+ DHT_MSG_BLOCK_INODELK_FAILED,
+ DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK,
+ DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS,
+ DHT_MSG_DST_NULL_SET_FAILED);
+
+#define DHT_MSG_FD_CTX_SET_FAILED_STR "Failed to set fd ctx"
+#define DHT_MSG_INVALID_VALUE_STR "Different dst found in the fd ctx"
+#define DHT_MSG_UNKNOWN_FOP_STR "Unknown FOP on file"
+#define DHT_MSG_OPEN_FD_ON_DST_FAILED_STR "Failed to open the fd on file"
+#define DHT_MSG_SYNCTASK_CREATE_FAILED_STR "Failed to create synctask"
+#define DHT_MSG_ASPRINTF_FAILED_STR \
+ "asprintf failed while fetching subvol from the id"
+#define DHT_MSG_HAS_MIGINFO_STR "Found miginfo in the inode ctx"
+#define DHT_MSG_FILE_LOOKUP_FAILED_STR "failed to lookup the file"
+#define DHT_MSG_INVALID_LINKFILE_STR \
+ "linkto target is different from cached-subvol. treating as destination " \
+ "subvol"
+#define DHT_MSG_GFID_MISMATCH_STR "gfid different on the target file"
+#define DHT_MSG_GET_XATTR_FAILED_STR "failed to get 'linkto' xattr"
+#define DHT_MSG_SET_INODE_CTX_FAILED_STR "failed to set inode-ctx target file"
+#define DHT_MSG_DIR_SELFHEAL_FAILED_STR "Healing of path failed"
+#define DHT_MSG_DIR_HEAL_ABORT_STR \
+ "Failed to get path from subvol. Aborting directory healing"
+#define DHT_MSG_DIR_XATTR_HEAL_FAILED_STR "xattr heal failed for directory"
+#define DHT_MSG_LOCK_INODE_UNREF_FAILED_STR \
+ "Found a NULL inode. Failed to unref the inode"
+#define DHT_MSG_DICT_SET_FAILED_STR "Failed to set dictionary value"
+#define DHT_MSG_NOT_LINK_FILE_ERROR_STR "got non-linkfile"
+#define DHT_MSG_CREATE_LINK_FAILED_STR "failed to initialize linkfile data"
+#define DHT_MSG_UNLINK_FAILED_STR "Unlinking linkfile on subvolume failed"
+#define DHT_MSG_MIGRATE_FILE_FAILED_STR "Migrate file failed"
+#define DHT_MSG_NO_MEMORY_STR "could not allocate memory for dict"
+#define DHT_MSG_SUBVOL_ERROR_STR "Failed to get linkto subvol"
+#define DHT_MSG_MIGRATE_HARDLINK_FILE_FAILED_STR "link failed on subvol"
+#define DHT_MSG_MIGRATE_FILE_SKIPPED_STR "Migration skipped"
+#define DHT_MSG_FD_CREATE_FAILED_STR "fd create failed"
+#define DHT_MSG_DICT_NEW_FAILED_STR "dict_new failed"
+#define DHT_MSG_FAILED_TO_OPEN_STR "failed to open"
+#define DHT_MSG_CREATE_FAILED_STR "failed to create"
+#define DHT_MSG_FILE_NOT_EXIST_STR "file does not exist"
+#define DHT_MSG_CHOWN_FAILED_STR "chown failed"
+#define DHT_MSG_FALLOCATE_FAILED_STR "fallocate failed"
+#define DHT_MSG_FTRUNCATE_FAILED_STR "ftruncate failed"
+#define DHT_MSG_STATFS_FAILED_STR "failed to get statfs"
+#define DHT_MSG_WRITE_CROSS_STR \
+ "write will cross min-fre-disk for file on subvol. looking for new subvol"
+#define DHT_MSG_SUBVOL_INSUFF_SPACE_STR \
+ "Could not find any subvol with space accommodating the file. Cosider " \
+ "adding bricks"
+#define DHT_MSG_NEW_TARGET_FOUND_STR "New target found for file"
+#define DHT_MSG_INSUFF_MEMORY_STR "insufficient memory"
+#define DHT_MSG_SET_XATTR_FAILED_STR "failed to set xattr"
+#define DHT_MSG_SET_MODE_FAILED_STR "failed to set mode"
+#define DHT_MSG_FILE_EXISTS_IN_DEST_STR "file exists in destination"
+#define DHT_MSG_LINKFILE_DEL_FAILED_STR "failed to delete the linkfile"
+#define DHT_MSG_SYMLINK_FAILED_STR "symlink failed"
+#define DHT_MSG_MKNOD_FAILED_STR "mknod failed"
+#define DHT_MSG_SETATTR_FAILED_STR "failed to perform setattr"
+#define DHT_MSG_MIGRATE_CLEANUP_FAILED_STR \
+ "Migrate file cleanup failed: failed to fstat file"
+#define DHT_MSG_LOCK_MIGRATE_STR "locks will be migrated for file"
+#define DHT_MSG_PARENT_BUILD_FAILED_STR \
+ "failed to build parent loc, which is needed to acquire entrylk to " \
+ "synchronize with renames on this path. Skipping migration"
+#define DHT_MSG_HASHED_SUBVOL_NOT_FOUND_STR \
+ "cannot find hashed subvol which is needed to synchronize with renames " \
+ "on this path. Skipping migration"
+#define DHT_MSG_ACQUIRE_ENTRYLK_FAILED_STR "failed to acquire entrylk on subvol"
+#define DHT_MSG_CREATE_DST_FAILED_STR "create dst failed for file"
+#define DHT_MSG_MIGRATION_EXIT_STR "Exiting migration"
+#define DHT_MSG_CHANGED_DST_STR "destination changed fo file"
+#define DHT_MSG_TRACE_FAILED_STR "Trace failed"
+#define DHT_MSG_WRITE_LOCK_FAILED_STR "write lock failed"
+#define DHT_MSG_GETACTIVELK_FAILED_STR "getactivelk failed for file"
+#define DHT_MSG_STAT_FAILED_STR "failed to do a stat"
+#define DHT_MSG_UNLINK_PERFORM_FAILED_STR "failed to perform unlink"
+#define DHT_MSG_MIGRATE_FILE_COMPLETE_STR "completed migration"
+#define DHT_MSG_CLANUP_SOURCE_FILE_FAILED_STR "failed to cleanup source file"
+#define DHT_MSG_UNLOCK_FILE_FAILED_STR "failed to unlock file"
+#define DHT_MSG_REMOVE_XATTR_FAILED_STR "remove xattr failed"
+#define DHT_MSG_SOCKET_ERROR_STR "Failed to unlink listener socket"
+#define DHT_MSG_HASHED_SUBVOL_GET_FAILED_STR "Failed to get hashed subvolume"
+#define DHT_MSG_CACHED_SUBVOL_GET_FAILED_STR "Failed to get cached subvolume"
+#define DHT_MSG_MIGRATE_DATA_FAILED_STR "migrate-data failed"
+#define DHT_MSG_DEFRAG_NULL_STR "defrag is NULL"
+#define DHT_MSG_DATA_MIGRATE_ABORT_STR \
+ "Readdirp failed. Aborting data migration for dict"
+#define DHT_MSG_LAYOUT_FIX_FAILED_STR "fix layout failed"
+#define DHT_MSG_PARENT_NULL_STR "parent is NULL"
+#define DHT_MSG_GFID_NOT_PRESENT_STR "gfid not present"
+#define DHT_MSG_CHILD_LOC_FAILED_STR "Child loc build failed"
+#define DHT_MSG_SET_LOOKUP_FAILED_STR "Failed to set lookup"
+#define DHT_MSG_DIR_LOOKUP_FAILED_STR "lookup failed"
+#define DHT_MSG_DIR_REMOVED_STR "Dir renamed or removed. Skipping"
+#define DHT_MSG_READDIR_ERROR_STR "readdir failed, Aborting fix-layout"
+#define DHT_MSG_SETTLE_HASH_FAILED_STR "Settle hash failed"
+#define DHT_MSG_DEFRAG_PROCESS_DIR_FAILED_STR "gf_defrag_process_dir failed"
+#define DHT_MSG_FIX_NOT_COMP_STR \
+ "Unable to retrieve fixlayout xattr. Assume background fix layout not " \
+ "complete"
+#define DHT_MSG_SUBVOL_DETER_FAILED_STR \
+ "local subvolume determination failed with error"
+#define DHT_MSG_LOCAL_SUBVOL_STR "local subvol"
+#define DHT_MSG_NODE_UUID_STR "node uuid"
+#define DHT_MSG_SIZE_FILE_STR "Total size files"
+#define DHT_MSG_GET_DATA_SIZE_FAILED_STR \
+ "Failed to get the total data size. Unable to estimate time to complete " \
+ "rebalance"
+#define DHT_MSG_PTHREAD_JOIN_FAILED_STR \
+ "file_counter_thread: pthread_join failed"
+#define DHT_MSG_COUNTER_THREAD_CREATE_FAILED_STR \
+ "Failed to create the file counter thread"
+#define DHT_MSG_MIGRATION_INIT_QUEUE_FAILED_STR \
+ "Failed to initialise migration queue"
+#define DHT_MSG_REBALANCE_STOPPED_STR "Received stop command on rebalance"
+#define DHT_MSG_PAUSED_TIMEOUT_STR "Request pause timer timeout"
+#define DHT_MSG_WOKE_STR "woken"
+#define DHT_MSG_ABORT_REBALANCE_STR "Aborting rebalance"
+#define DHT_MSG_REBALANCE_START_FAILED_STR \
+ "Failed to start rebalance: look up on / failed"
+#define DHT_MSG_CREATE_TASK_REBAL_FAILED_STR \
+ "Could not create task for rebalance"
+#define DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL_STR \
+ "Rebalance estimates will not be available"
+#define DHT_MSG_REBALANCE_STATUS_STR "Rebalance status"
+#define DHT_MSG_DATA_NULL_STR "data value is NULL"
+#define DHT_MSG_ADD_CHOICES_ERROR_STR "Error to add choices in buffer"
+#define DHT_MSG_GET_CHOICES_ERROR_STR "Error to get choices"
+#define DHT_MSG_PREPARE_STATUS_ERROR_STR "Error to prepare status"
+#define DHT_MSG_SET_CHOICE_FAILED_STR "Failed to set full choice"
+#define DHT_MSG_AGGREGATE_QUOTA_XATTR_FAILED_STR \
+ "Failed to aggregate quota xattr"
+#define DHT_MSG_FILE_TYPE_MISMATCH_STR \
+ "path exists as a file on one subvolume and directory on another. Please " \
+ "fix it manually"
+#define DHT_MSG_LAYOUT_SET_FAILED_STR "failed to set layout for subvolume"
+#define DHT_MSG_LAYOUT_MERGE_FAILED_STR "failed to merge layouts for subvolume"
+#define DHT_MSG_SET_HASHED_SUBVOL_FAILED_STR "Failed to set hashed subvolume"
+#define DHT_MSG_XATTR_HEAL_NOT_POSS_STR \
+ "No gfid exists for path. so healing xattr is not possible"
+#define DHT_MSG_REVALIDATE_CBK_INFO_STR "Revalidate: subvolume returned -1"
+#define DHT_MSG_LAYOUT_MISMATCH_STR "Mismatching layouts"
+#define DHT_MSG_UNLINK_LOOKUP_INFO_STR "lookup_unlink retuened"
+#define DHT_MSG_LINKTO_FILE_FAILED_STR \
+ "Could not unlink the linkto file as either fd is open and/or linkto " \
+ "xattr is set"
+#define DHT_MSG_LAYOUT_PRESET_FAILED_STR \
+ "Could not set pre-set layout for subvolume"
+#define DHT_MSG_FILE_ON_MULT_SUBVOL_STR \
+ "multiple subvolumes have file (preferably rename the file in the " \
+ "backend, and do a fresh lookup"
+#define DHT_MSG_STALE_LINKFILE_DELETE_STR \
+ "attempting deletion of stale linkfile"
+#define DHT_MSG_LINK_FILE_LOOKUP_INFO_STR "Lookup on following linkfile"
+#define DHT_MSG_NO_SUBVOL_FOR_LINKTO_STR "No link subvolume for linkto"
+#define DHT_MSG_SUBVOL_RETURNED_STR "Subvolume returned -1"
+#define DHT_MSG_UNKNOWN_LOCAL_XSEL_STR "Unknown local->xsel"
+#define DHT_MSG_DICT_GET_FAILED_STR "Failed to get"
+#define DHT_MSG_UUID_PARSE_ERROR_STR "Failed to parse uuid"
+#define DHT_MSG_GET_XATTR_ERR_STR "getxattr err for dir"
+#define DHT_MSG_ALLOC_OR_FILL_FAILED_STR "alloc or fill failed"
+#define DHT_MSG_UPGRADE_BRICKS_STR \
+ "At least one of the bricks does not support this operation. Please " \
+ "upgrade all bricks"
+#define DHT_MSG_GET_REAL_NAME_FAILED_STR "Failed to get real filename"
+#define DHT_MSG_LAYOUT_NULL_STR "Layout is NULL"
+#define DHT_MSG_COPY_UUID_FAILED_STR "Failed to copy node uuid key"
+#define DHT_MSG_MDS_DETER_FAILED_STR \
+ "Cannot determine MDS, fetching xattr randomly from a subvol"
+#define DHT_MSG_HASHED_SUBVOL_DOWN_STR \
+ "MDS is down for path, so fetching xattr randomly from subvol"
+#define DHT_MSG_CREATE_REBAL_FAILED_STR \
+ "failed to create a new rebalance synctask"
+#define DHT_MSG_FIX_LAYOUT_INFO_STR "fixing the layout"
+#define DHT_MSG_OPERATION_NOT_SUP_STR "wrong directory-spread-count value"
+#define DHT_MSG_LINK_LAYOUT_FAILED_STR "failed to link the layout in inode"
+#define DHT_MSG_NO_SUBVOL_IN_LAYOUT_STR "no subvolume in layout for path"
+#define DHT_MSG_INODE_LK_ERROR_STR "mknod lock failed for file"
+#define DHT_MSG_MEM_ALLOC_FAILED_STR "mem allocation failed"
+#define DHT_MSG_PARENT_LAYOUT_CHANGED_STR \
+ "extracting in-memory layout of parent failed"
+#define DHT_MSG_SET_IN_PARAMS_DICT_FAILED_STR \
+ "setting in params dictionary failed"
+#define DHT_MSG_LOC_COPY_FAILED_STR "loc_copy failed"
+#define DHT_MSG_LOC_FAILED_STR "parent loc build failed"
+#define DHT_MSG_PARENT_LOC_FAILED_STR "locking parent failed"
+#define DHT_MSG_CREATE_LOCK_FAILED_STR "Create lock failed"
+#define DHT_MSG_PREV_ATTEMPT_FAILED_STR \
+ "mkdir loop detected. parent layout didn't change even though previous " \
+ "attempt of mkdir failed because of in-memory layout not matching with " \
+ "that on disk."
+#define DHT_MSG_REFRESH_ATTEMPT_STR \
+ "mkdir parent layout changed. Attempting a refresh and then a retry"
+#define DHT_MSG_ACQUIRE_LOCK_FAILED_STR \
+ "Acquiring lock on parent to guard against layout-change failed"
+#define DHT_MSG_CREATE_STUB_FAILED_STR "creating stub failed"
+#define DHT_MSG_WIND_LOCK_REQ_FAILED_STR \
+ "cannot wind lock request to guard parent layout"
+#define DHT_MSG_REFRESH_FAILED_STR "refreshing parent layout failed."
+#define DHT_MSG_CACHED_SUBVOL_ERROR_STR "On cached subvol"
+#define DHT_MSG_NO_LINK_SUBVOL_STR "Linkfile does not have link subvolume"
+#define DHT_MSG_SET_KEY_FAILED_STR "failed to set key"
+#define DHT_MSG_CHILD_DOWN_STR "Received CHILD_DOWN. Exiting"
+#define DHT_MSG_LOG_FIXED_LAYOUT_STR "log layout fixed"
+#define DHT_MSG_REBAL_STRUCT_SET_STR "local->rebalance already set"
+#define DHT_MSG_REMOVE_LINKTO_FAILED_STR "Removal of linkto failed at subvol"
+#define DHT_MSG_LAYOUT_DICT_SET_FAILED_STR "dht layout dict set failed"
+#define DHT_MSG_SUBVOL_INFO_STR "creating subvolume"
+#define DHT_MSG_COMPUTE_HASH_FAILED_STR "hash computation failed"
+#define DHT_MSG_INVALID_DISK_LAYOUT_STR \
+ "Invalid disk layout: Catastrophic error layout with unknown type found"
+#define DHT_MSG_LAYOUT_SORT_FAILED_STR "layout sort failed"
+#define DHT_MSG_ANOMALIES_INFO_STR "Found anomalies"
+#define DHT_MSG_XATTR_DICT_NULL_STR "xattr dictionary is NULL"
+#define DHT_MSG_DISK_LAYOUT_MISSING_STR "Disk layout missing"
+#define DHT_MSG_LAYOUT_INFO_STR "layout info"
+#define DHT_MSG_SUBVOL_NO_LAYOUT_INFO_STR "no pre-set layout for subvol"
+#define DHT_MSG_SELFHEAL_XATTR_FAILED_STR "layout setxattr failed"
+#define DHT_MSG_DIR_SELFHEAL_XATTR_FAILED_STR "Directory self heal xattr failed"
+#define DHT_MSG_DUMMY_ALLOC_FAILED_STR "failed to allocate dummy layout"
+#define DHT_MSG_DICT_IS_NULL_STR \
+ "dict is NULL, need to make sure gfids are same"
+#define DHT_MSG_ENTRYLK_ERROR_STR "acquiring entrylk after inodelk failed"
+#define DHT_MSG_NO_DISK_USAGE_STATUS_STR "no du stats"
+#define DHT_MSG_LINK_INODE_FAILED_STR "linking inode failed"
+#define DHT_MSG_SELFHEAL_FAILED_STR "Directory selfheal failed"
+#define DHT_MSG_NO_MDS_SUBVOL_STR "No mds subvol"
+#define DHT_MSG_LIST_XATTRS_FAILED_STR "failed to list xattrs"
+#define DHT_MSG_RESET_INTER_XATTR_FAILED_STR "Failed to reset internal xattr"
+#define DHT_MSG_MDS_DOWN_UNABLE_TO_SET_STR \
+ "mds subvol is down, unable to set xattr"
+#define DHT_MSG_DIR_ATTR_HEAL_FAILED_STR \
+ "Directory attr heal failed. Failed to set uid/gid"
+#define DHT_MSG_WIND_UNLOCK_FAILED_STR \
+ "Winding unlock failed: stale locks left on brick"
+#define DHT_MSG_COMMIT_HASH_FAILED_STR "Directory commit hash updaten failed"
+#define DHT_MSG_LK_ARRAY_INFO_STR "lk info"
+#define DHT_MSG_UNLOCK_GFID_FAILED_STR \
+ "unlock failed on gfid: stale lock might be left"
+#define DHT_MSG_UNLOCKING_FAILED_STR "unlocking failed"
+#define DHT_MSG_UNLOCK_FOLLOW_ENTRYLK_STR "not unlocking following entrylks"
+#define DHT_MSG_COPY_FRAME_FAILED_STR "copy frame failed"
+#define DHT_MSG_UNLOCK_FOLLOW_LOCKS_STR "not unlocking following locks"
+#define DHT_MSG_INODELK_FAILED_STR "inodelk failed on subvol"
+#define DHT_MSG_LOCK_FRAME_FAILED_STR "memory allocation failed for lock_frame"
+#define DHT_MSG_LOCAL_LOCK_INIT_FAILED_STR "dht_local_lock_init failed"
+#define DHT_MSG_ENTRYLK_FAILED_AFT_INODELK_STR \
+ "dht_blocking_entrylk failed after taking inodelk"
+#define DHT_MSG_BLOCK_INODELK_FAILED_STR "dht_blocking_inodelk failed"
+#define DHT_MSG_CALLOC_FAILED_STR "calloc failed"
+#define DHT_MSG_LOCK_ALLOC_FAILED_STR "lock allocation failed"
+#define DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS_STR \
+ "cannot allocate a frame, not unlocking following entrylks"
+#define DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK_STR \
+ "storing locks in local failed, not unlocking following entrylks"
+#define DHT_MSG_DST_NULL_SET_FAILED_STR \
+ "src or dst is NULL, Failed to set dictionary value"
#endif /* _DHT_MESSAGES_H_ */
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 206628208f5..8ba8082bd86 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -8,92 +8,143 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "dht-common.h"
-#include "xlator.h"
-#include <signal.h>
+#include <glusterfs/syscall.h>
#include <fnmatch.h>
#include <signal.h>
+#include <glusterfs/events.h>
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
+
+#define GF_DISK_SECTOR_SIZE 512
+#define DHT_REBALANCE_PID 4242 /* Change it if required */
+#define DHT_REBALANCE_BLKSIZE 1048576 /* 1 MB */
+#define MAX_MIGRATE_QUEUE_COUNT 500
+#define MIN_MIGRATE_QUEUE_COUNT 200
+#define MAX_REBAL_TYPE_SIZE 16
+#define FILE_CNT_INTERVAL 600 /* 10 mins */
+#define ESTIMATE_START_INTERVAL 600 /* 10 mins */
+#define HARDLINK_MIG_INPROGRESS -2
+#define SKIP_MIGRATION_FD_POSITIVE -3
+#ifndef MAX
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#endif
-#define GF_DISK_SECTOR_SIZE 512
-#define DHT_REBALANCE_PID 4242 /* Change it if required */
-#define DHT_REBALANCE_BLKSIZE (128 * 1024)
+#define GF_CRAWL_INDEX_MOVE(idx, sv_cnt) \
+ { \
+ idx++; \
+ idx %= sv_cnt; \
+ }
-static int
-dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count,
- int32_t size, off_t offset, struct iobref *iobref)
+uint64_t g_totalfiles = 0;
+uint64_t g_totalsize = 0;
+
+void
+gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt)
{
- int i = 0;
- int ret = -1;
- int start_idx = 0;
- int tmp_offset = 0;
- int write_needed = 0;
- int buf_len = 0;
- int size_pending = 0;
- char *buf = NULL;
-
- /* loop through each vector */
- for (i = 0; i < count; i++) {
- buf = vec[i].iov_base;
- buf_len = vec[i].iov_len;
-
- for (start_idx = 0; (start_idx + GF_DISK_SECTOR_SIZE) <= buf_len;
- start_idx += GF_DISK_SECTOR_SIZE) {
-
- if (mem_0filled (buf + start_idx, GF_DISK_SECTOR_SIZE) != 0) {
- write_needed = 1;
- continue;
- }
+ int i = 0;
+
+ if (meta) {
+ for (i = 0; i < local_subvols_cnt; i++) {
+ if (meta->equeue)
+ gf_dirent_free(&meta->equeue[i]);
+ if (meta->lfd && meta->lfd[i])
+ fd_unref(meta->lfd[i]);
+ }
- if (write_needed) {
- ret = syncop_write (to, fd, (buf + tmp_offset),
- (start_idx - tmp_offset),
- (offset + tmp_offset),
- iobref, 0, NULL, NULL);
- /* 'path' will be logged in calling function */
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to write (%s)",
- strerror (-ret));
- ret = -1;
- goto out;
- }
-
- write_needed = 0;
- }
- tmp_offset = start_idx + GF_DISK_SECTOR_SIZE;
- }
+ GF_FREE(meta->equeue);
+ GF_FREE(meta->head);
+ GF_FREE(meta->iterator);
+ GF_FREE(meta->offset_var);
+ GF_FREE(meta->fetch_entries);
+ GF_FREE(meta->lfd);
+ GF_FREE(meta);
+ }
+}
- if ((start_idx < buf_len) || write_needed) {
- /* This means, last chunk is not yet written.. write it */
- ret = syncop_write (to, fd, (buf + tmp_offset),
- (buf_len - tmp_offset),
- (offset + tmp_offset), iobref, 0,
- NULL, NULL);
- if (ret < 0) {
- /* 'path' will be logged in calling function */
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to write (%s)",
- strerror (-ret));
- ret = -1;
- goto out;
- }
- }
+void
+gf_defrag_free_container(struct dht_container *container)
+{
+ if (container) {
+ gf_dirent_entry_free(container->df_entry);
- size_pending = (size - buf_len);
- if (!size_pending)
- break;
+ if (container->parent_loc) {
+ loc_wipe(container->parent_loc);
}
- ret = size;
-out:
- return ret;
+ GF_FREE(container->parent_loc);
+
+ GF_FREE(container);
+ }
+}
+void
+dht_set_global_defrag_error(gf_defrag_info_t *defrag, int ret)
+{
+ LOCK(&defrag->lock);
+ {
+ defrag->global_error = ret;
+ }
+ UNLOCK(&defrag->lock);
+ return;
+}
+
+static int
+dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char *tmpstr = NULL;
+ char *ptr = NULL;
+ char *suffix = "-dht";
+ int len = 0;
+
+ eventtypes_t event = EVENT_LAST;
+
+ switch (status) {
+ case GF_DEFRAG_STATUS_COMPLETE:
+ event = EVENT_VOLUME_REBALANCE_COMPLETE;
+ break;
+ case GF_DEFRAG_STATUS_FAILED:
+ event = EVENT_VOLUME_REBALANCE_FAILED;
+ break;
+ case GF_DEFRAG_STATUS_STOPPED:
+ event = EVENT_VOLUME_REBALANCE_STOP;
+ break;
+ default:
+ break;
+ }
+
+ /* DHT volume */
+ len = strlen(this->name) - strlen(suffix);
+ tmpstr = gf_strdup(this->name);
+ if (tmpstr) {
+ ptr = tmpstr + len;
+ if (!strcmp(ptr, suffix)) {
+ tmpstr[len] = '\0';
+ volname = tmpstr;
+ }
+ }
+
+ if (!volname) {
+ /* Better than nothing */
+ volname = this->name;
+ }
+
+ if (event != EVENT_LAST) {
+ gf_event(event, "volume=%s", volname);
+ }
+
+ GF_FREE(tmpstr);
+ return ret;
+}
+
+static void
+dht_strip_out_acls(dict_t *dict)
+{
+ if (dict) {
+ dict_del(dict, "trusted.SGI_ACL_FILE");
+ dict_del(dict, POSIX_ACL_ACCESS_XATTR);
+ }
}
/*
@@ -132,150 +183,316 @@ be converted to "0" in dht_migrate_file.
*/
int32_t
-gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs,
- struct iatt *stbuf)
+gf_defrag_handle_hardlink(xlator_t *this, loc_t *loc, int *fop_errno)
{
- int32_t ret = -1;
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- xlator_t *linkto_subvol = NULL;
- data_t *data = NULL;
- struct iatt iatt = {0,};
- int32_t op_errno = 0;
- dht_conf_t *conf = NULL;
- gf_loglevel_t loglevel = 0;
- dict_t *link_xattr = NULL;
-
- GF_VALIDATE_OR_GOTO ("defrag", loc, out);
- GF_VALIDATE_OR_GOTO ("defrag", loc->name, out);
- GF_VALIDATE_OR_GOTO ("defrag", stbuf, out);
- GF_VALIDATE_OR_GOTO ("defrag", this, out);
- GF_VALIDATE_OR_GOTO ("defrag", xattrs, out);
- GF_VALIDATE_OR_GOTO ("defrag", this->private, out);
-
- conf = this->private;
-
- if (gf_uuid_is_null (loc->pargfid)) {
- gf_msg ("", GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed :"
- "loc->pargfid is NULL for %s", loc->path);
- goto out;
+ int32_t ret = -1;
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *linkto_subvol = NULL;
+ data_t *data = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+ int32_t op_errno = 0;
+ dht_conf_t *conf = NULL;
+ gf_loglevel_t loglevel = 0;
+ dict_t *link_xattr = NULL;
+ dict_t *dict = NULL;
+ dict_t *xattr_rsp = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+
+ *fop_errno = EINVAL;
+
+ GF_VALIDATE_OR_GOTO("defrag", loc, out);
+ GF_VALIDATE_OR_GOTO("defrag", loc->name, out);
+ GF_VALIDATE_OR_GOTO("defrag", this, out);
+ GF_VALIDATE_OR_GOTO("defrag", this->private, out);
+
+ conf = this->private;
+
+ if (gf_uuid_is_null(loc->pargfid)) {
+ gf_msg("", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "loc->pargfid is NULL for %s",
+ loc->path);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ if (gf_uuid_is_null(loc->gfid)) {
+ gf_msg("", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "loc->gfid is NULL for %s",
+ loc->path);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ link_xattr = dict_new();
+ if (!link_xattr) {
+ ret = -1;
+ *fop_errno = ENOMEM;
+ goto out;
+ }
+
+ /*
+ Parallel migration can lead to migration of the hard link multiple
+ times which can lead to data loss. Hence, adding a fresh lookup to
+ decide whether migration is required or not.
+
+ Elaborating the scenario for let say 10 hardlinks [link{1..10}]:
+ Let say the first hard link "link1" does the setxattr of the
+ new hashed subvolume info on the cached file. As there are multiple
+ threads working, we might have already all the links created on the
+ new hashed by the time we reach hardlink let say link5. Now the
+ number of links on hashed is equal to that of cached. Hence, file
+ migration will happen for link6.
+
+ Cached Hashed
+ --------T link6 rwxrwxrwx link6
+
+ Now post above state all the link file on the cached will be zero
+ byte linkto files. Hence, if we still do migration for the following
+ files link{7..10}, we will end up migrating 0 data leading to data
+ loss.
+ Hence, a lookup can make sure whether we need to migrate the
+ file or not.
+ */
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ *fop_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "could not allocate memory for dict");
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, conf->link_xattr_name, 256);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to set 'linkto' key in dict",
+ loc->path);
+ goto out;
+ }
+
+ ret = syncop_lookup(this, loc, &stbuf, NULL, dict, &xattr_rsp);
+ if (ret) {
+ /*Ignore ENOENT and ESTALE as file might have been
+ migrated already*/
+ if (-ret == ENOENT || -ret == ESTALE) {
+ ret = -2;
+ goto out;
+ }
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:%s lookup failed with ret = %d", loc->path,
+ ret);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ cached_subvol = dht_subvol_get_cached(this, loc->inode);
+ if (!cached_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "Failed to get cached subvol"
+ " for %s on %s",
+ loc->name, this->name);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (!hashed_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "Failed to get hashed subvol"
+ " for %s on %s",
+ loc->name, this->name);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ /* Hardlink migration happens only with remove-brick. So this condition will
+ * be true only when the migration has happened. In case hardlinks are
+ * migrated for rebalance case, remove this check. Having this check here
+ * avoid redundant calls below*/
+ if (hashed_subvol == cached_subvol) {
+ ret = -2;
+ goto out;
+ }
+
+ gf_log(this->name, GF_LOG_INFO,
+ "Attempting to migrate hardlink %s "
+ "with gfid %s from %s -> %s",
+ loc->name, uuid_utoa(loc->gfid), cached_subvol->name,
+ hashed_subvol->name);
+
+ data = dict_get(xattr_rsp, conf->link_xattr_name);
+ /* set linkto on cached -> hashed if not present, else link it */
+ if (!data) {
+ ret = dict_set_str(link_xattr, conf->link_xattr_name,
+ hashed_subvol->name);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "Failed to set dictionary value:"
+ " key = %s for %s",
+ conf->link_xattr_name, loc->name);
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
}
- if (gf_uuid_is_null (loc->gfid)) {
- gf_msg ("", GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed :"
- "loc->gfid is NULL for %s", loc->path);
- goto out;
+ ret = syncop_setxattr(cached_subvol, loc, link_xattr, 0, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "Linkto setxattr failed %s -> %s",
+ cached_subvol->name, loc->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
}
- link_xattr = dict_new ();
- if (!link_xattr) {
- ret = -1;
- errno = ENOMEM;
- goto out;
+ gf_msg_debug(this->name, 0,
+ "hardlink target subvol created on %s "
+ ",cached %s, file %s",
+ hashed_subvol->name, cached_subvol->name, loc->path);
+
+ ret = -2;
+ goto out;
+ } else {
+ linkto_subvol = dht_linkfile_subvol(this, NULL, NULL, xattr_rsp);
+ if (!linkto_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SUBVOL_ERROR,
+ "Failed to get "
+ "linkto subvol for %s",
+ loc->name);
+ } else {
+ hashed_subvol = linkto_subvol;
}
- cached_subvol = dht_subvol_get_cached (this, loc->inode);
- if (!cached_subvol) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed :"
- "Failed to get cached subvol"
- " for %s on %s", loc->name, this->name);
+ ret = syncop_link(hashed_subvol, loc, loc, &iatt, NULL, NULL);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+
+ loglevel = (op_errno == EEXIST) ? GF_LOG_DEBUG : GF_LOG_ERROR;
+ gf_msg(this->name, loglevel, op_errno,
+ DHT_MSG_MIGRATE_HARDLINK_FILE_FAILED,
+ "link of %s -> %s"
+ " failed on subvol %s",
+ loc->name, uuid_utoa(loc->gfid), hashed_subvol->name);
+ if (op_errno != EEXIST) {
+ *fop_errno = op_errno;
goto out;
+ }
+ } else {
+ gf_msg_debug(this->name, 0,
+ "syncop_link successful for"
+ " hardlink %s on subvol %s, cached %s",
+ loc->path, hashed_subvol->name, cached_subvol->name);
}
+ }
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- if (!hashed_subvol) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed :"
- "Failed to get hashed subvol"
- " for %s on %s", loc->name, this->name);
- goto out;
+ ret = syncop_lookup(hashed_subvol, loc, &iatt, NULL, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :Failed lookup %s on %s ", loc->name,
+ hashed_subvol->name);
+
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ /* There is a race where on the target subvol for the hardlink
+ * (note: hash subvol for the hardlink might differ from this), some
+ * other client(non-rebalance) would have created a linkto file for that
+ * hardlink as part of lookup. So let say there are 10 hardlinks, on the
+ * 5th hardlink it self the hardlinks might have migrated. Now for
+ * (6..10th) hardlinks the cached and target would be same as the file
+ * has already migrated. Hence this check is needed */
+ if (cached_subvol == hashed_subvol) {
+ gf_msg_debug(this->name, 0,
+ "source %s and destination %s "
+ "for hardlink %s are same",
+ cached_subvol->name, hashed_subvol->name, loc->path);
+ ret = -2;
+ goto out;
+ }
+
+ if (iatt.ia_nlink == stbuf.ia_nlink) {
+ ret = dht_migrate_file(this, loc, cached_subvol, hashed_subvol,
+ GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS, fop_errno);
+ if (ret) {
+ goto out;
}
+ }
+ ret = -2;
+out:
+ if (link_xattr)
+ dict_unref(link_xattr);
- gf_log (this->name, GF_LOG_INFO, "Attempting to migrate hardlink %s "
- "with gfid %s from %s -> %s", loc->name, uuid_utoa (loc->gfid),
- cached_subvol->name, hashed_subvol->name);
- data = dict_get (xattrs, conf->link_xattr_name);
- /* set linkto on cached -> hashed if not present, else link it */
- if (!data) {
- ret = dict_set_str (link_xattr, conf->link_xattr_name,
- hashed_subvol->name);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed :"
- "Failed to set dictionary value:"
- " key = %s for %s",
- conf->link_xattr_name, loc->name);
- goto out;
- }
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
- ret = syncop_setxattr (cached_subvol, loc, link_xattr, 0, NULL,
- NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed :"
- "Linkto setxattr failed %s -> %s (%s)",
- cached_subvol->name,
- loc->name, strerror (-ret));
- ret = -1;
- goto out;
- }
- ret = -2;
- goto out;
- } else {
- linkto_subvol = dht_linkfile_subvol (this, NULL, NULL, xattrs);
- if (!linkto_subvol) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to get "
- "linkto subvol for %s", loc->name);
- } else {
- hashed_subvol = linkto_subvol;
- }
+ if (dict)
+ dict_unref(dict);
- ret = syncop_link (hashed_subvol, loc, loc, NULL, NULL);
- if (ret) {
- op_errno = -ret;
- ret = -1;
+ return ret;
+}
- loglevel = (op_errno == EEXIST) ? GF_LOG_DEBUG : \
- GF_LOG_ERROR;
- gf_log (this->name, loglevel, "link of %s -> %s"
- " failed on subvol %s (%s)", loc->name,
- uuid_utoa(loc->gfid),
- hashed_subvol->name, strerror (op_errno));
- if (op_errno != EEXIST)
- goto out;
- }
- }
- ret = syncop_lookup (hashed_subvol, loc, &iatt, NULL, NULL, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed :Failed lookup %s on %s ",
- loc->name, hashed_subvol->name);
+static int
+__check_file_has_hardlink(xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ dict_t *xattrs, int flags, gf_defrag_info_t *defrag,
+ dht_conf_t *conf, int *fop_errno)
+{
+ int ret = 0;
- ret = -1;
- goto out;
+ if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) {
+ ret = 0;
+ return ret;
+ }
+ if (stbuf->ia_nlink > 1) {
+ /* support for decomission */
+ if (flags == GF_DHT_MIGRATE_HARDLINK) {
+ synclock_lock(&conf->link_lock);
+ ret = gf_defrag_handle_hardlink(this, loc, fop_errno);
+ synclock_unlock(&conf->link_lock);
+ /*
+ Returning zero will force the file to be remigrated.
+ Checkout gf_defrag_handle_hardlink for more information.
+ */
+ if (ret && ret != -2) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to migrate file with link",
+ loc->path);
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migration skipped for:"
+ "%s: file has hardlinks",
+ loc->path);
+ *fop_errno = ENOTSUP;
+ ret = 1;
}
+ }
- if (iatt.ia_nlink == stbuf->ia_nlink) {
- ret = dht_migrate_file (this, loc, cached_subvol, hashed_subvol,
- GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS);
- if (ret)
- goto out;
- }
- ret = -2;
-out:
- if (link_xattr)
- dict_unref (link_xattr);
- return ret;
+ return ret;
}
/*
@@ -286,594 +503,962 @@ out:
gf_defrag_handle_hardlink for description of "returning -2")
-1 : failure
*/
-static inline int
-__is_file_migratable (xlator_t *this, loc_t *loc,
- struct iatt *stbuf, dict_t *xattrs, int flags)
+static int
+__is_file_migratable(xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ dict_t *xattrs, int flags, gf_defrag_info_t *defrag,
+ dht_conf_t *conf, int *fop_errno)
{
- int ret = -1;
+ int ret = -1;
+ int lock_count = 0;
+
+ if (IA_ISDIR(stbuf->ia_type)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: migrate-file called on directory",
+ loc->path);
+ *fop_errno = EISDIR;
+ ret = -1;
+ goto out;
+ }
- if (IA_ISDIR (stbuf->ia_type)) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed:"
- "%s: migrate-file called on directory", loc->path);
- ret = -1;
- goto out;
+ if (!conf->lock_migration_enabled) {
+ ret = dict_get_int32(xattrs, GLUSTERFS_POSIXLK_COUNT, &lock_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: Unable to get lock count for file",
+ loc->path);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
}
- if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) {
- ret = 0;
- goto out;
- }
- if (stbuf->ia_nlink > 1) {
- /* support for decomission */
- if (flags == GF_DHT_MIGRATE_HARDLINK) {
- ret = gf_defrag_handle_hardlink (this, loc,
- xattrs, stbuf);
-
- /*
- Returning zero will force the file to be remigrated.
- Checkout gf_defrag_handle_hardlink for more information.
- */
- if (ret && ret != -2) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed:"
- "%s: failed to migrate file with link",
- loc->path);
- }
- } else {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed:"
- "%s: file has hardlinks", loc->path);
- ret = -ENOTSUP;
- }
- goto out;
+ if (lock_count) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s: File has locks."
+ " Skipping file migration",
+ loc->path);
+ *fop_errno = ENOTSUP;
+ ret = 1;
+ goto out;
}
+ }
- ret = 0;
-
+ /* Check if file has hardlink*/
+ ret = __check_file_has_hardlink(this, loc, stbuf, xattrs, flags, defrag,
+ conf, fop_errno);
out:
- return ret;
+ return ret;
}
-static inline int
-__dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf,
- dict_t *dict, fd_t **dst_fd, dict_t *xattr)
+static int
+__dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
+ loc_t *loc, struct iatt *stbuf, fd_t **dst_fd,
+ int *fop_errno, int file_has_holes)
{
- xlator_t *this = NULL;
- int ret = -1;
- fd_t *fd = NULL;
- struct iatt new_stbuf = {0,};
- struct iatt check_stbuf= {0,};
- dht_conf_t *conf = NULL;
-
- this = THIS;
- conf = this->private;
-
- ret = dict_set_static_bin (dict, "gfid-req", stbuf->ia_gfid, 16);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_DICT_SET_FAILED,
- "%s: failed to set dictionary value: key = gfid-req",
- loc->path);
- goto out;
- }
-
- ret = dict_set_str (dict, conf->link_xattr_name, from->name);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_DICT_SET_FAILED,
- "%s: failed to set dictionary value: key = %s ",
- loc->path, conf->link_xattr_name);
- goto out;
+ int ret = -1;
+ int ret2 = -1;
+ fd_t *fd = NULL;
+ struct iatt new_stbuf = {
+ 0,
+ };
+ struct iatt check_stbuf = {
+ 0,
+ };
+ dht_conf_t *conf = NULL;
+ dict_t *dict = NULL;
+ dict_t *xdata = NULL;
+
+ conf = this->private;
+
+ dict = dict_new();
+ if (!dict) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "dictionary allocation failed for"
+ "path:%s",
+ loc->path);
+ goto out;
+ }
+ ret = dict_set_gfuuid(dict, "gfid-req", stbuf->ia_gfid, true);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: failed to set dictionary value: key = gfid-req", loc->path);
+ goto out;
+ }
+
+ ret = dict_set_str(dict, conf->link_xattr_name, from->name);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: failed to set dictionary value: key = %s ", loc->path,
+ conf->link_xattr_name);
+ goto out;
+ }
+
+ fd = fd_create(loc->inode, DHT_REBALANCE_PID);
+ if (!fd) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: fd create failed (destination)", loc->path);
+ goto out;
+ }
+
+ xdata = dict_new();
+ if (!xdata) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: dict_new failed)", loc->path);
+ goto out;
+ }
+
+ ret = dict_set_int32_sizen(xdata, GF_CLEAN_WRITE_PROTECTION, 1);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: failed to set dictionary value: key = %s ", loc->path,
+ GF_CLEAN_WRITE_PROTECTION);
+ goto out;
+ }
+
+ ret = syncop_lookup(to, loc, &new_stbuf, NULL, xdata, NULL);
+ if (!ret) {
+ /* File exits in the destination, check if gfid matches */
+ if (gf_uuid_compare(stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH,
+ "file %s exists in %s with different gfid", loc->path,
+ to->name);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
}
+ }
+ if ((ret < 0) && (-ret != ENOENT)) {
+ /* File exists in destination, but not accessible */
+ gf_msg(THIS->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to lookup file", loc->path);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
- fd = fd_create (loc->inode, DHT_REBALANCE_PID);
- if (!fd) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "%s: fd create failed (destination) (%s)",
- loc->path, strerror (errno));
- ret = -1;
- goto out;
+ /* Create the destination with LINKFILE mode, and linkto xattr,
+ if the linkfile already exists, just open the file */
+ if (!ret) {
+ /*
+ * File already present, just open the file.
+ */
+ ret = syncop_open(to, loc, O_RDWR, fd, NULL, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to open %s on %s", loc->path, to->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
}
-
- ret = syncop_lookup (to, loc, &new_stbuf, NULL, NULL, NULL);
- if (!ret) {
- /* File exits in the destination, check if gfid matches */
- if (gf_uuid_compare (stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_GFID_MISMATCH,
- "file %s exists in %s with different gfid",
- loc->path, to->name);
- fd_unref (fd);
- goto out;
- }
+ } else {
+ ret = syncop_create(to, loc, O_RDWR, DHT_LINKFILE_MODE, fd, &new_stbuf,
+ dict, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to create %s on %s", loc->path, to->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
}
- if ((ret < 0) && (-ret != ENOENT)) {
- /* File exists in destination, but not accessible */
- gf_msg (THIS->name, GF_LOG_WARNING, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "%s: failed to lookup file (%s)",
- loc->path, strerror (-ret));
- ret = -1;
- goto out;
+ }
+
+ fd_bind(fd);
+
+ /*Reason of doing lookup after create again:
+ *In the create, there is some time-gap between opening fd at the
+ *server (posix_layer) and binding it in server (incrementing fd count),
+ *so if in that time-gap, if other process sends unlink considering it
+ *as a linkto file, because inode->fd count will be 0, so file will be
+ *unlinked at the backend. And because further operations are performed
+ *on fd, so though migration will be done but will end with no file
+ *at the backend.
+ */
+
+ ret = syncop_lookup(to, loc, &check_stbuf, NULL, NULL, NULL);
+ if (!ret) {
+ if (gf_uuid_compare(stbuf->ia_gfid, check_stbuf.ia_gfid) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH,
+ "file %s exists in %s with different gfid,"
+ "found in lookup after create",
+ loc->path, to->name);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
}
-
- /* Create the destination with LINKFILE mode, and linkto xattr,
- if the linkfile already exists, it will just open the file */
- ret = syncop_create (to, loc, O_RDWR, DHT_LINKFILE_MODE, fd,
- &new_stbuf, dict, NULL);
- if (ret < 0) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "failed to create %s on %s (%s)",
- loc->path, to->name, strerror (-ret));
- ret = -1;
- goto out;
+ }
+
+ if (-ret == ENOENT) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: file does not exist"
+ "on %s",
+ loc->path, to->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_fsetattr(to, fd, stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
+ NULL, NULL, NULL, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "chown failed for %s on %s", loc->path, to->name);
+ }
+
+ /* No need to bother about 0 byte size files */
+ if (stbuf->ia_size > 0) {
+ if (conf->use_fallocate && !file_has_holes) {
+ ret = syncop_fallocate(to, fd, 0, 0, stbuf->ia_size, NULL, NULL);
+ if (ret < 0) {
+ if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -ENOSYS) {
+ conf->use_fallocate = _gf_false;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "fallocate failed for %s on %s", loc->path,
+ to->name);
+
+ *fop_errno = -ret;
+
+ /* fallocate does not release the space
+ * in some cases
+ */
+ ret2 = syncop_ftruncate(to, fd, 0, NULL, NULL, NULL, NULL);
+ if (ret2 < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret2,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "ftruncate failed for "
+ "%s on %s",
+ loc->path, to->name);
+ }
+ goto out;
+ }
+ }
+ } else {
+ ret = syncop_ftruncate(to, fd, stbuf->ia_size, NULL, NULL, NULL,
+ NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "ftruncate failed for %s on %s", loc->path, to->name);
+ }
}
+ }
+ /* success */
+ ret = 0;
- /*Reason of doing lookup after create again:
- *In the create, there is some time-gap between opening fd at the
- *server (posix_layer) and binding it in server (incrementing fd count),
- *so if in that time-gap, if other process sends unlink considering it
- *as a linkto file, because inode->fd count will be 0, so file will be
- *unlinked at the backend. And because furthur operations are performed
- *on fd, so though migration will be done but will end with no file
- *at the backend.
- */
+ if (dst_fd)
+ *dst_fd = fd;
-
- ret = syncop_lookup (to, loc, &check_stbuf, NULL, NULL, NULL);
- if (!ret) {
-
- if (gf_uuid_compare (stbuf->ia_gfid, check_stbuf.ia_gfid) != 0) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_GFID_MISMATCH,
- "file %s exists in %s with different gfid,"
- "found in lookup after create",
- loc->path, to->name);
- ret = -1;
- fd_unref (fd);
- goto out;
- }
-
- }
-
- if (-ret == ENOENT) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED, "%s: file does not exists"
- "on %s (%s)", loc->path, to->name, strerror (-ret));
- ret = -1;
- fd_unref (fd);
- goto out;
+out:
+ if (ret) {
+ if (fd) {
+ fd_unref(fd);
}
+ }
+ if (dict)
+ dict_unref(dict);
- ret = syncop_fsetxattr (to, fd, xattr, 0, NULL, NULL);
- if (ret < 0)
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "%s: failed to set xattr on %s (%s)",
- loc->path, to->name, strerror (-ret));
-
- ret = syncop_ftruncate (to, fd, stbuf->ia_size, NULL, NULL);
- if (ret < 0)
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "ftruncate failed for %s on %s (%s)",
- loc->path, to->name, strerror (-ret));
-
- ret = syncop_fsetattr (to, fd, stbuf,
- (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
- NULL, NULL, NULL, NULL);
- if (ret < 0)
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "chown failed for %s on %s (%s)",
- loc->path, to->name, strerror (-ret));
-
- if (dst_fd)
- *dst_fd = fd;
-
- /* success */
- ret = 0;
+ if (xdata)
+ dict_unref(xdata);
-out:
- return ret;
+ return ret;
}
-static inline int
-__dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,
- struct iatt *stbuf, int flag)
+static int
+__dht_check_free_space(xlator_t *this, xlator_t *to, xlator_t *from, loc_t *loc,
+ struct iatt *stbuf, int flag, dht_conf_t *conf,
+ gf_boolean_t *target_changed, xlator_t **new_subvol,
+ int *fop_errno)
{
- struct statvfs src_statfs = {0,};
- struct statvfs dst_statfs = {0,};
- int ret = -1;
- xlator_t *this = NULL;
- dict_t *xdata = NULL;
-
- uint64_t src_statfs_blocks = 1;
- uint64_t dst_statfs_blocks = 1;
-
- this = THIS;
-
- xdata = dict_new ();
- if (!xdata) {
- errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "failed to allocate dictionary");
- goto out;
+ struct statvfs src_statfs = {
+ 0,
+ };
+ struct statvfs dst_statfs = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *xdata = NULL;
+ dht_layout_t *layout = NULL;
+ uint64_t src_statfs_blocks = 1;
+ uint64_t dst_statfs_blocks = 1;
+ double dst_post_availspacepercent = 0;
+ double src_post_availspacepercent = 0;
+ uint64_t file_blocks = 0;
+ uint64_t src_total_blocks = 0;
+ uint64_t dst_total_blocks = 0;
+
+ xdata = dict_new();
+ if (!xdata) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "failed to allocate dictionary");
+ goto out;
+ }
+
+ ret = dict_set_int8(xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to set " GF_INTERNAL_IGNORE_DEEM_STATFS " in dict");
+ ret = -1;
+ *fop_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_statfs(from, loc, &src_statfs, xdata, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to get statfs of %s on %s", loc->path, from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_statfs(to, loc, &dst_statfs, xdata, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to get statfs of %s on %s", loc->path, to->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0,
+ "min_free_disk - %f , block available - %" PRId64
+ ", block size - %lu",
+ conf->min_free_disk, dst_statfs.f_bavail, dst_statfs.f_bsize);
+
+ dst_statfs_blocks = dst_statfs.f_bavail *
+ (dst_statfs.f_frsize / GF_DISK_SECTOR_SIZE);
+
+ src_statfs_blocks = src_statfs.f_bavail *
+ (src_statfs.f_frsize / GF_DISK_SECTOR_SIZE);
+
+ dst_total_blocks = dst_statfs.f_blocks *
+ (dst_statfs.f_frsize / GF_DISK_SECTOR_SIZE);
+
+ src_total_blocks = src_statfs.f_blocks *
+ (src_statfs.f_frsize / GF_DISK_SECTOR_SIZE);
+
+ /* if force option is given, do not check for space @ dst.
+ * Check only if space is avail for the file */
+ if (flag != GF_DHT_MIGRATE_DATA)
+ goto check_avail_space;
+
+ /* Check:
+ During rebalance `migrate-data` - Destination subvol experiences
+ a `reduction` in 'blocks' of free space, at the same time source
+ subvol gains certain 'blocks' of free space. A valid check is
+ necessary here to avoid erroneous move to destination where
+ the space could be scantily available.
+ With heterogeneous brick support, an actual space comparison could
+ prevent any files being migrated to newly added bricks if they are
+ smaller then the free space available on the existing bricks.
+ */
+ if (!conf->use_fallocate) {
+ file_blocks = stbuf->ia_size + GF_DISK_SECTOR_SIZE - 1;
+ file_blocks /= GF_DISK_SECTOR_SIZE;
+
+ if (file_blocks >= dst_statfs_blocks) {
+ dst_statfs_blocks = 0;
+ } else {
+ dst_statfs_blocks -= file_blocks;
}
+ }
+
+ src_post_availspacepercent = ((src_statfs_blocks + file_blocks) * 100) /
+ src_total_blocks;
+
+ dst_post_availspacepercent = (dst_statfs_blocks * 100) / dst_total_blocks;
+
+ if (dst_post_availspacepercent < src_post_availspacepercent) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "data movement of file "
+ "{blocks:%" PRIu64
+ " name:(%s)} would result in "
+ "dst node (%s:%" PRIu64
+ ") having lower disk "
+ "space than the source node (%s:%" PRIu64
+ ")"
+ ".Skipping file.",
+ stbuf->ia_blocks, loc->path, to->name, dst_statfs_blocks,
+ from->name, src_statfs_blocks);
+
+ /* this is not a 'failure', but we don't want to
+ consider this as 'success' too :-/ */
+ *fop_errno = ENOSPC;
+ ret = 1;
+ goto out;
+ }
- ret = dict_set_int8 (xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to set "
- GF_INTERNAL_IGNORE_DEEM_STATFS" in dict");
- ret = -1;
- goto out;
+check_avail_space:
+ if (conf->disk_unit == 'p' && dst_statfs.f_blocks) {
+ dst_post_availspacepercent = (dst_statfs_blocks * 100) /
+ dst_total_blocks;
+
+ gf_msg_debug(this->name, 0,
+ "file : %s, post_availspacepercent"
+ " : %lf f_bavail : %" PRIu64 " min-free-disk: %lf",
+ loc->path, dst_post_availspacepercent, dst_statfs.f_bavail,
+ conf->min_free_disk);
+
+ if (dst_post_availspacepercent < conf->min_free_disk) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, 0,
+ "Write will cross min-free-disk for "
+ "file - %s on subvol - %s. Looking "
+ "for new subvol",
+ loc->path, to->name);
+
+ goto find_new_subvol;
+ } else {
+ ret = 0;
+ goto out;
}
+ }
- ret = syncop_statfs (from, loc, &src_statfs, xdata, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "failed to get statfs of %s on %s (%s)",
- loc->path, from->name, strerror (-ret));
- ret = -1;
- goto out;
- }
+ if (conf->disk_unit != 'p') {
+ if ((dst_statfs_blocks * GF_DISK_SECTOR_SIZE) < conf->min_free_disk) {
+ gf_msg_debug(this->name, 0,
+ "file : %s, destination frsize: %lu "
+ "f_bavail : %" PRIu64 " min-free-disk: %lf",
+ loc->path, dst_statfs.f_frsize, dst_statfs.f_bavail,
+ conf->min_free_disk);
- ret = syncop_statfs (to, loc, &dst_statfs, xdata, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "failed to get statfs of %s on %s (%s)",
- loc->path, to->name, strerror (-ret));
- ret = -1;
- goto out;
- }
+ gf_msg(this->name, GF_LOG_WARNING, 0, 0,
+ "write will"
+ " cross min-free-disk for file - %s on subvol -"
+ " %s. looking for new subvol",
+ loc->path, to->name);
- /* if force option is given, do not check for space @ dst.
- * Check only if space is avail for the file */
- if (flag != GF_DHT_MIGRATE_DATA)
- goto check_avail_space;
+ goto find_new_subvol;
- /* Check:
- During rebalance `migrate-data` - Destination subvol experiences
- a `reduction` in 'blocks' of free space, at the same time source
- subvol gains certain 'blocks' of free space. A valid check is
- necessary here to avoid errorneous move to destination where
- the space could be scantily available.
- */
- if (stbuf) {
- dst_statfs_blocks = ((dst_statfs.f_bavail *
- dst_statfs.f_bsize) /
- GF_DISK_SECTOR_SIZE);
- src_statfs_blocks = ((src_statfs.f_bavail *
- src_statfs.f_bsize) /
- GF_DISK_SECTOR_SIZE);
- if ((dst_statfs_blocks - stbuf->ia_blocks) <
- (src_statfs_blocks + stbuf->ia_blocks)) {
-
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "data movement attempted from node "
- "(%s:%"PRIu64") with higher disk space "
- "to a node (%s:%"PRIu64") with lesser "
- "disk space, file { blocks:%"PRIu64", "
- "name:(%s) }", from->name, src_statfs_blocks,
- to->name, dst_statfs_blocks,
- stbuf->ia_blocks, loc->path);
-
- /* this is not a 'failure', but we don't want to
- consider this as 'success' too :-/ */
- ret = -1;
- goto out;
- }
- }
-check_avail_space:
- if (((dst_statfs.f_bavail * dst_statfs.f_bsize) /
- GF_DISK_SECTOR_SIZE) < stbuf->ia_blocks) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "data movement attempted from node (%s) to node (%s) "
- "which does not have required free space for (%s)",
- from->name, to->name, loc->path);
- ret = -1;
- goto out;
+ } else {
+ ret = 0;
+ goto out;
}
+ }
+find_new_subvol:
+ layout = dht_layout_get(this, loc->parent);
+ if (!layout) {
+ gf_log(this->name, GF_LOG_ERROR, "Layout is NULL");
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ *new_subvol = dht_subvol_with_free_space_inodes(this, to, from, layout,
+ stbuf->ia_size);
+ if ((!(*new_subvol)) || (*new_subvol == from)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SUBVOL_INSUFF_SPACE,
+ "Could not find any subvol"
+ " with space accommodating the file - %s. Consider "
+ "adding bricks",
+ loc->path);
+
+ *target_changed = _gf_false;
+ *fop_errno = ENOSPC;
+ ret = -1;
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "new target found - %s"
+ " for file - %s",
+ (*new_subvol)->name, loc->path);
+ *target_changed = _gf_true;
ret = 0;
+ }
+
out:
- if (xdata)
- dict_unref (xdata);
- return ret;
+ if (xdata)
+ dict_unref(xdata);
+ return ret;
}
-static inline int
-__dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
- uint64_t ia_size, int hole_exists)
+static int
+__dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
+ xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
+ uint64_t ia_size, int hole_exists, int *fop_errno)
{
- int ret = 0;
- int count = 0;
- off_t offset = 0;
- struct iovec *vector = NULL;
- struct iobref *iobref = NULL;
- uint64_t total = 0;
- size_t read_size = 0;
-
- /* if file size is '0', no need to enter this loop */
- while (total < ia_size) {
- read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ?
- DHT_REBALANCE_BLKSIZE : (ia_size - total));
- ret = syncop_readv (from, src, read_size,
- offset, 0, &vector, &count, &iobref, NULL,
- NULL);
- if (!ret || (ret < 0)) {
- break;
+ int ret = 0;
+ int count = 0;
+ off_t offset = 0;
+ off_t data_offset = 0;
+ off_t hole_offset = 0;
+ struct iovec *vector = NULL;
+ struct iobref *iobref = NULL;
+ uint64_t total = 0;
+ size_t read_size = 0;
+ size_t data_block_size = 0;
+ dict_t *xdata = NULL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ /* if file size is '0', no need to enter this loop */
+ while (total < ia_size) {
+ /* This is a regular file - read it sequentially */
+ if (!hole_exists) {
+ read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE)
+ ? DHT_REBALANCE_BLKSIZE
+ : (ia_size - total));
+ } else {
+ /* This is a sparse file - read only the data segments in the file
+ */
+
+ /* If the previous data block is fully copied, find the next data
+ * segment
+ * starting at the offset of the last read and written byte, */
+ if (data_block_size <= 0) {
+ ret = syncop_seek(from, src, offset, GF_SEEK_DATA, NULL,
+ &data_offset);
+ if (ret) {
+ if (ret == -ENXIO)
+ ret = 0; /* No more data segments */
+ else
+ *fop_errno = -ret; /* Error occurred */
+
+ break;
}
- if (hole_exists)
- ret = dht_write_with_holes (to, dst, vector, count,
- ret, offset, iobref);
- else
- ret = syncop_writev (to, dst, vector, count,
- offset, iobref, 0, NULL, NULL);
- if (ret < 0) {
+ /* If the position of the current data segment is greater than
+ * the position of the next hole, find the next hole in order to
+ * calculate the length of the new data segment */
+ if (data_offset > hole_offset) {
+ /* Starting at the offset of the last data segment, find the
+ * next hole */
+ ret = syncop_seek(from, src, data_offset, GF_SEEK_HOLE,
+ NULL, &hole_offset);
+ if (ret) {
+ /* If an error occurred here it's a real error because
+ * if the seek for a data segment was successful then
+ * necessarily another hole must exist (EOF is a hole)
+ */
+ *fop_errno = -ret;
break;
- }
- offset += ret;
- total += ret;
+ }
- GF_FREE (vector);
- if (iobref)
- iobref_unref (iobref);
- iobref = NULL;
- vector = NULL;
+ /* Calculate the total size of the current data block */
+ data_block_size = hole_offset - data_offset;
+ }
+ } else {
+ /* There is still data in the current segment, move the
+ * data_offset to the position of the last written byte */
+ data_offset = offset;
+ }
+
+ /* Calculate how much data needs to be read and written. If the data
+ * segment's length is bigger than DHT_REBALANCE_BLKSIZE, read and
+ * write DHT_REBALANCE_BLKSIZE data length and the rest in the
+ * next iteration(s) */
+ read_size = ((data_block_size > DHT_REBALANCE_BLKSIZE)
+ ? DHT_REBALANCE_BLKSIZE
+ : data_block_size);
+
+ /* Calculate the remaining size of the data block - maybe there's no
+ * need to seek for data in the next iteration */
+ data_block_size -= read_size;
+
+ /* Set offset to the offset of the data segment so read and write
+ * will have the correct position */
+ offset = data_offset;
}
- if (iobref)
- iobref_unref (iobref);
- GF_FREE (vector);
- if (ret >= 0)
- ret = 0;
- else
- ret = -1;
+ ret = syncop_readv(from, src, read_size, offset, 0, &vector, &count,
+ &iobref, NULL, NULL, NULL);
- return ret;
-}
-
-
-static inline int
-__dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc,
- struct iatt *stbuf, fd_t **src_fd)
-{
- int ret = 0;
- fd_t *fd = NULL;
- dict_t *dict = NULL;
- xlator_t *this = NULL;
- struct iatt iatt = {0,};
- dht_conf_t *conf = NULL;
+ if (!ret || (ret < 0)) {
+ if (!ret) {
+ /* File was probably truncated*/
+ ret = -1;
+ *fop_errno = ENOSPC;
+ } else {
+ *fop_errno = -ret;
+ }
+ break;
+ }
- this = THIS;
- conf = this->private;
+ if (!conf->force_migration) {
+ if (!xdata) {
+ xdata = dict_new();
+ if (!xdata) {
+ gf_msg("dht", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "insufficient memory");
+ ret = -1;
+ *fop_errno = ENOMEM;
+ break;
+ }
- fd = fd_create (loc->inode, DHT_REBALANCE_PID);
- if (!fd) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "%s: fd create failed (source)", loc->path);
- ret = -1;
- goto out;
+ /* Fail this write and abort rebalance if we
+ * detect a write from client since migration of
+ * this file started. This is done to avoid
+ * potential data corruption due to out of order
+ * writes from rebalance and client to the same
+ * region (as compared between src and dst
+ * files). See
+ * https://github.com/gluster/glusterfs/issues/308
+ * for more details.
+ */
+ ret = dict_set_int32_sizen(xdata, GF_AVOID_OVERWRITE, 1);
+ if (ret) {
+ gf_msg("dht", GF_LOG_ERROR, 0, ENOMEM,
+ "failed to set dict");
+ ret = -1;
+ *fop_errno = ENOMEM;
+ break;
+ }
+ }
}
- ret = syncop_open (from, loc, O_RDWR, fd, NULL, NULL);
+ ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL,
+ NULL, xdata, NULL);
if (ret < 0) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "failed to open file %s on %s (%s)",
- loc->path, from->name, strerror (-ret));
- ret = -1;
- goto out;
+ *fop_errno = -ret;
+ break;
}
+ offset += ret;
+ total += ret;
+
+ GF_FREE(vector);
+ if (iobref)
+ iobref_unref(iobref);
+ iobref = NULL;
+ vector = NULL;
+ }
+ if (iobref)
+ iobref_unref(iobref);
+ GF_FREE(vector);
+
+ if (ret >= 0)
+ ret = 0;
+ else
ret = -1;
- dict = dict_new ();
- if (!dict)
- goto out;
- ret = dict_set_str (dict, conf->link_xattr_name, to->name);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to set xattr in dict for %s (linkto:%s)",
- loc->path, to->name);
- goto out;
- }
+ if (xdata) {
+ dict_unref(xdata);
+ }
- /* Once the migration starts, the source should have 'linkto' key set
- to show which is the target, so other clients can work around it */
- ret = syncop_setxattr (from, loc, dict, 0, NULL, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "failed to set xattr on %s in %s (%s)",
- loc->path, from->name, strerror (-ret));
- ret = -1;
- goto out;
- }
+ return ret;
+}
- /* mode should be (+S+T) to indicate migration is in progress */
- iatt.ia_prot = stbuf->ia_prot;
- iatt.ia_type = stbuf->ia_type;
- iatt.ia_prot.sticky = 1;
- iatt.ia_prot.sgid = 1;
+static int
+__dht_rebalance_open_src_file(xlator_t *this, xlator_t *from, xlator_t *to,
+ loc_t *loc, struct iatt *stbuf, fd_t **src_fd,
+ gf_boolean_t *clean_src, int *fop_errno)
+{
+ int ret = 0;
+ fd_t *fd = NULL;
+ dict_t *dict = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ *clean_src = _gf_false;
+
+ fd = fd_create(loc->inode, DHT_REBALANCE_PID);
+ if (!fd) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: fd create failed (source)", loc->path);
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_open(from, loc, O_RDWR, fd, NULL, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to open file %s on %s", loc->path, from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
- ret = syncop_setattr (from, loc, &iatt, GF_SET_ATTR_MODE, NULL, NULL,
- NULL, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "failed to set mode on %s in %s (%s)",
- loc->path, from->name, strerror (-ret));
- ret = -1;
- goto out;
- }
+ fd_bind(fd);
- if (src_fd)
- *src_fd = fd;
+ if (src_fd)
+ *src_fd = fd;
- /* success */
- ret = 0;
+ ret = -1;
+ dict = dict_new();
+ if (!dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: Could not allocate memory for dict", loc->path);
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str(dict, conf->link_xattr_name, to->name);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to set xattr in dict for %s (linkto:%s)", loc->path,
+ to->name);
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ /* Once the migration starts, the source should have 'linkto' key set
+ to show which is the target, so other clients can work around it */
+ ret = syncop_setxattr(from, loc, dict, 0, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to set xattr on %s in %s", loc->path, from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ /* Reset source mode/xattr if migration fails*/
+ *clean_src = _gf_true;
+
+ /* mode should be (+S+T) to indicate migration is in progress */
+ iatt.ia_prot = stbuf->ia_prot;
+ iatt.ia_type = stbuf->ia_type;
+ iatt.ia_prot.sticky = 1;
+ iatt.ia_prot.sgid = 1;
+
+ ret = syncop_setattr(from, loc, &iatt, GF_SET_ATTR_MODE, NULL, NULL, NULL,
+ NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to set mode on %s in %s", loc->path, from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ /* success */
+ ret = 0;
out:
- if (dict)
- dict_unref (dict);
+ if (dict)
+ dict_unref(dict);
- return ret;
+ return ret;
}
int
-migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc,
- struct iatt *buf)
+migrate_special_files(xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc,
+ struct iatt *buf, int *fop_errno)
{
- int ret = -1;
- dict_t *rsp_dict = NULL;
- dict_t *dict = NULL;
- char *link = NULL;
- struct iatt stbuf = {0,};
- dht_conf_t *conf = this->private;
-
- dict = dict_new ();
- if (!dict)
- goto out;
+ int ret = -1;
+ dict_t *rsp_dict = NULL;
+ dict_t *dict = NULL;
+ char *link = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ dht_conf_t *conf = this->private;
+
+ dict = dict_new();
+ if (!dict) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32(dict, conf->link_xattr_name, 256);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s: failed to set 'linkto' key in dict", loc->path);
+ goto out;
+ }
+
+ /* check in the destination if the file is link file */
+ ret = syncop_lookup(to, loc, &stbuf, NULL, dict, &rsp_dict);
+ if ((ret < 0) && (-ret != ENOENT)) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: lookup failed", loc->path);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ /* we no more require this key */
+ dict_del(dict, conf->link_xattr_name);
+
+ /* file exists in target node, only if it is 'linkfile' its valid,
+ otherwise, error out */
+ if (!ret) {
+ if (!check_is_linkfile(loc->inode, &stbuf, rsp_dict,
+ conf->link_xattr_name)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: file exists in destination", loc->path);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
- ret = dict_set_int32 (dict, conf->link_xattr_name, 256);
+ /* as file is linkfile, delete it */
+ ret = syncop_unlink(to, loc, NULL, NULL);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set 'linkto' key in dict", loc->path);
- goto out;
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to delete the linkfile", loc->path);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
}
+ }
- /* check in the destination if the file is link file */
- ret = syncop_lookup (to, loc, &stbuf, NULL, dict, &rsp_dict);
- if ((ret < 0) && (-ret != ENOENT)) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "%s: lookup failed (%s)",
- loc->path, strerror (-ret));
- ret = -1;
- goto out;
+ /* Set the gfid of the source file in dict */
+ ret = dict_set_gfuuid(dict, "gfid-req", buf->ia_gfid, true);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s: failed to set gfid in dict for create", loc->path);
+ goto out;
+ }
+
+ /* Create the file in target */
+ if (IA_ISLNK(buf->ia_type)) {
+ /* Handle symlinks separately */
+ ret = syncop_readlink(from, loc, &link, buf->ia_size, NULL, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: readlink on symlink failed", loc->path);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
}
- /* we no more require this key */
- dict_del (dict, conf->link_xattr_name);
+ ret = syncop_symlink(to, loc, link, 0, dict, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED, "%s: creating symlink failed",
+ loc->path);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
- /* file exists in target node, only if it is 'linkfile' its valid,
- otherwise, error out */
- if (!ret) {
- if (!check_is_linkfile (loc->inode, &stbuf, rsp_dict,
- conf->link_xattr_name)) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "%s: file exists in destination", loc->path);
- ret = -1;
- goto out;
- }
+ goto done;
+ }
- /* as file is linkfile, delete it */
- ret = syncop_unlink (to, loc, NULL, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "%s: failed to delete the linkfile (%s)",
- loc->path, strerror (-ret));
- ret = -1;
- goto out;
- }
- }
+ ret = syncop_mknod(to, loc, st_mode_from_ia(buf->ia_prot, buf->ia_type),
+ makedev(ia_major(buf->ia_rdev), ia_minor(buf->ia_rdev)),
+ 0, dict, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: mknod failed", loc->path);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
- /* Set the gfid of the source file in dict */
- ret = dict_set_static_bin (dict, "gfid-req", buf->ia_gfid, 16);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set gfid in dict for create", loc->path);
- goto out;
- }
+done:
+ ret = syncop_setattr(to, loc, buf,
+ (GF_SET_ATTR_MTIME | GF_SET_ATTR_UID |
+ GF_SET_ATTR_GID | GF_SET_ATTR_MODE),
+ NULL, NULL, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to perform setattr on %s", loc->path, to->name);
+ *fop_errno = -ret;
+ }
+
+ ret = syncop_unlink(from, loc, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: unlink failed", loc->path);
+ *fop_errno = -ret;
+ ret = -1;
+ }
- /* Create the file in target */
- if (IA_ISLNK (buf->ia_type)) {
- /* Handle symlinks separately */
- ret = syncop_readlink (from, loc, &link, buf->ia_size, NULL,
- NULL);
- if (ret < 0) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "%s: readlink on symlink failed (%s)",
- loc->path, strerror (-ret));
- ret = -1;
- goto out;
- }
+out:
+ GF_FREE(link);
+ if (dict)
+ dict_unref(dict);
- ret = syncop_symlink (to, loc, link, 0, dict, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "%s: creating symlink failed (%s)",
- loc->path, strerror (-ret));
- ret = -1;
- goto out;
- }
+ if (rsp_dict)
+ dict_unref(rsp_dict);
- goto done;
- }
+ return ret;
+}
- ret = syncop_mknod (to, loc, st_mode_from_ia (buf->ia_prot,
- buf->ia_type),
- makedev (ia_major (buf->ia_rdev),
- ia_minor (buf->ia_rdev)), 0, dict, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "%s: mknod failed (%s)",
- loc->path, strerror (-ret));
- ret = -1;
- goto out;
- }
+static int
+__dht_migration_cleanup_src_file(xlator_t *this, loc_t *loc, fd_t *fd,
+ xlator_t *from, ia_prot_t *src_ia_prot)
+{
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ struct iatt new_stbuf = {
+ 0,
+ };
+
+ if (!this || !fd || !from || !src_ia_prot) {
+ goto out;
+ }
+
+ conf = this->private;
+
+ /*Revert source mode and xattr changes*/
+ ret = syncop_fstat(from, fd, &new_stbuf, NULL, NULL);
+ if (ret < 0) {
+ /* Failed to get the stat info */
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file cleanup failed: failed to fstat "
+ "file %s on %s ",
+ loc->path, from->name);
+ ret = -1;
+ goto out;
+ }
-done:
- ret = syncop_setattr (to, loc, buf,
- (GF_SET_ATTR_MTIME |
- GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_MODE), NULL, NULL, NULL, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "%s: failed to perform setattr on %s (%s)",
- loc->path, to->name, strerror (-ret));
- ret = -1;
- }
+ /* Remove the sticky bit and sgid bit set, reset it to 0*/
+ if (!src_ia_prot->sticky)
+ new_stbuf.ia_prot.sticky = 0;
- ret = syncop_unlink (from, loc, NULL, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "%s: unlink failed (%s)",
- loc->path, strerror (-ret));
- ret = -1;
- }
+ if (!src_ia_prot->sgid)
+ new_stbuf.ia_prot.sgid = 0;
-out:
- GF_FREE (link);
- if (dict)
- dict_unref (dict);
+ ret = syncop_fsetattr(from, fd, &new_stbuf,
+ (GF_SET_ATTR_GID | GF_SET_ATTR_MODE), NULL, NULL,
+ NULL, NULL);
- if (rsp_dict)
- dict_unref (rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file cleanup failed:"
+ "%s: failed to perform fsetattr on %s ",
+ loc->path, from->name);
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_fremovexattr(from, fd, conf->link_xattr_name, 0, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s: failed to remove linkto xattr on %s (%s)", loc->path,
+ from->name, strerror(-ret));
+ ret = -1;
+ goto out;
+ }
- return ret;
+ ret = 0;
+
+out:
+ return ret;
}
/*
@@ -884,1340 +1469,3234 @@ out:
1 : not a failure, but we can't migrate data as of now
*/
int
-dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
- int flag)
+dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+ int flag, int *fop_errno)
{
- int ret = -1;
- struct iatt new_stbuf = {0,};
- struct iatt stbuf = {0,};
- struct iatt empty_iatt = {0,};
- ia_prot_t src_ia_prot = {0,};
- fd_t *src_fd = NULL;
- fd_t *dst_fd = NULL;
- dict_t *dict = NULL;
- dict_t *xattr = NULL;
- dict_t *xattr_rsp = NULL;
- int file_has_holes = 0;
- dht_conf_t *conf = this->private;
- int rcvd_enoent_from_src = 0;
- struct gf_flock flock = {0, };
- loc_t tmp_loc = {0, };
- gf_boolean_t locked = _gf_false;
- int lk_ret = -1;
-
- gf_log (this->name, GF_LOG_INFO, "%s: attempting to move from %s to %s",
- loc->path, from->name, to->name);
-
- dict = dict_new ();
- if (!dict)
- goto out;
+ int ret = -1;
+ struct iatt new_stbuf = {
+ 0,
+ };
+ struct iatt stbuf = {
+ 0,
+ };
+ struct iatt empty_iatt = {
+ 0,
+ };
+ ia_prot_t src_ia_prot = {
+ 0,
+ };
+ fd_t *src_fd = NULL;
+ fd_t *dst_fd = NULL;
+ dict_t *dict = NULL;
+ dict_t *xattr = NULL;
+ dict_t *xattr_rsp = NULL;
+ int file_has_holes = 0;
+ dht_conf_t *conf = this->private;
+ int rcvd_enoent_from_src = 0;
+ struct gf_flock flock = {
+ 0,
+ };
+ struct gf_flock plock = {
+ 0,
+ };
+ loc_t tmp_loc = {
+ 0,
+ };
+ loc_t parent_loc = {
+ 0,
+ };
+ gf_boolean_t inodelk_locked = _gf_false;
+ gf_boolean_t entrylk_locked = _gf_false;
+ gf_boolean_t p_locked = _gf_false;
+ int lk_ret = -1;
+ gf_defrag_info_t *defrag = NULL;
+ gf_boolean_t clean_src = _gf_false;
+ gf_boolean_t clean_dst = _gf_false;
+ int log_level = GF_LOG_INFO;
+ gf_boolean_t delete_src_linkto = _gf_true;
+ lock_migration_info_t locklist;
+ dict_t *meta_dict = NULL;
+ gf_boolean_t meta_locked = _gf_false;
+ gf_boolean_t target_changed = _gf_false;
+ xlator_t *new_target = NULL;
+ xlator_t *old_target = NULL;
+ xlator_t *hashed_subvol = NULL;
+ fd_t *linkto_fd = NULL;
+ dict_t *xdata = NULL;
+
+ if (from == to) {
+ gf_msg_debug(this->name, 0,
+ "destination and source are same. file %s"
+ " might have migrated already",
+ loc->path);
+ ret = 0;
+ goto out;
+ }
+
+ gf_log(this->name, log_level, "%s: attempting to move from %s to %s",
+ loc->path, from->name, to->name);
- ret = dict_set_int32 (dict, conf->link_xattr_name, 256);
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ *fop_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "Could not allocate memory for dict");
+ goto out;
+ }
+ ret = dict_set_int32(dict, conf->link_xattr_name, 256);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to set 'linkto' key in dict",
+ loc->path);
+ goto out;
+ }
+
+ /* Do not migrate file in case lock migration is not enabled on the
+ * volume*/
+ if (!conf->lock_migration_enabled) {
+ ret = dict_set_int32(dict, GLUSTERFS_POSIXLK_COUNT, sizeof(int32_t));
if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed:"
- "%s: failed to set 'linkto' key in dict", loc->path);
- goto out;
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s: failed to "
+ "set " GLUSTERFS_POSIXLK_COUNT " key in dict",
+ loc->path);
+ goto out;
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "locks will be migrated"
+ " for file: %s",
+ loc->path);
+ }
+
+ /* The file is locked to prevent a rename during a migration. Renames
+ * and migrations on the file at the same time can lead to data loss.
+ */
+
+ ret = dht_build_parent_loc(this, &parent_loc, loc, fop_errno);
+ if (ret < 0) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_WARNING, *fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to build parent loc, which is needed to "
+ "acquire entrylk to synchronize with renames on this "
+ "path. Skipping migration",
+ loc->path);
+ goto out;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (hashed_subvol == NULL) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: cannot find hashed subvol which is needed to "
+ "synchronize with renames on this path. "
+ "Skipping migration",
+ loc->path);
+ goto out;
+ }
+
+ flock.l_type = F_WRLCK;
+
+ tmp_loc.inode = inode_ref(loc->inode);
+ gf_uuid_copy(tmp_loc.gfid, loc->gfid);
+ tmp_loc.path = gf_strdup(loc->path);
+
+ /* this inodelk happens with flock.owner being zero. But to synchronize
+ * hardlink migration we need to have different lkowner for each migration
+ * Filed a bug here: https://bugzilla.redhat.com/show_bug.cgi?id=1468202 to
+ * track the fix for this. Currently synclock takes care of synchronizing
+ * hardlink migration. Once this bug is fixed we can avoid taking synclock
+ */
+ ret = syncop_inodelk(from, DHT_FILE_MIGRATE_DOMAIN, &tmp_loc, F_SETLKW,
+ &flock, NULL, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_WARNING, *fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "migrate file failed: "
+ "%s: failed to lock file on %s",
+ loc->path, from->name);
+ goto out;
+ }
+
+ inodelk_locked = _gf_true;
+
+ /* dht_rename has changed to use entrylk on hashed subvol for
+ * synchronization. So, rebalance too has to acquire an entrylk on
+ * hashed subvol.
+ */
+ ret = syncop_entrylk(hashed_subvol, DHT_ENTRY_SYNC_DOMAIN, &parent_loc,
+ loc->name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_WARNING, *fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to acquire entrylk on subvol %s", loc->path,
+ hashed_subvol->name);
+ goto out;
+ }
+
+ entrylk_locked = _gf_true;
+
+ /* Phase 1 - Data migration is in progress from now on */
+ ret = syncop_lookup(from, loc, &stbuf, NULL, dict, &xattr_rsp);
+ if (ret) {
+ *fop_errno = -ret;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, *fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: lookup failed on %s",
+ loc->path, from->name);
+ goto out;
+ }
+
+ /* preserve source mode, so set the same to the destination */
+ src_ia_prot = stbuf.ia_prot;
+
+ /* Check if file can be migrated */
+ ret = __is_file_migratable(this, loc, &stbuf, xattr_rsp, flag, defrag, conf,
+ fop_errno);
+ if (ret) {
+ if (ret == HARDLINK_MIG_INPROGRESS)
+ ret = 0;
+ goto out;
+ }
+
+ /* Take care of the special files */
+ if (!IA_ISREG(stbuf.ia_type)) {
+ /* Special files */
+ ret = migrate_special_files(this, from, to, loc, &stbuf, fop_errno);
+ goto out;
+ }
+
+ /* Try to preserve 'holes' while migrating data */
+ if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
+ file_has_holes = 1;
+
+ /* create the destination, with required modes/xattr */
+ ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf, &dst_fd,
+ fop_errno, file_has_holes);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "Create dst failed"
+ " on - %s for file - %s",
+ to->name, loc->path);
+ goto out;
+ }
+
+ clean_dst = _gf_true;
+
+ ret = __dht_check_free_space(this, to, from, loc, &stbuf, flag, conf,
+ &target_changed, &new_target, fop_errno);
+ if (target_changed) {
+ /* Can't handle for hardlinks. Marking this as failure */
+ if (flag == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS || stbuf.ia_nlink > 1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SUBVOL_INSUFF_SPACE,
+ "Exiting migration for"
+ " file - %s. flag - %d, stbuf.ia_nlink - %d",
+ loc->path, flag, stbuf.ia_nlink);
+ ret = -1;
+ goto out;
}
- flock.l_type = F_WRLCK;
+ ret = syncop_ftruncate(to, dst_fd, 0, NULL, NULL, NULL, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s: failed to perform truncate on %s (%s)", loc->path,
+ to->name, strerror(-ret));
+ }
- tmp_loc.inode = inode_ref (loc->inode);
- gf_uuid_copy (tmp_loc.gfid, loc->gfid);
- tmp_loc.path = gf_strdup(loc->path);
+ syncop_close(dst_fd);
+ dst_fd = NULL;
- ret = syncop_inodelk (from, DHT_FILE_MIGRATE_DOMAIN, &tmp_loc, F_SETLKW,
- &flock, NULL, NULL);
- if (ret < 0) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "migrate file failed: "
- "%s: failed to lock file on %s (%s)",
- loc->path, from->name, strerror (-ret));
- ret = -1;
- goto out;
- }
+ old_target = to;
+ to = new_target;
- locked = _gf_true;
+ clean_dst = _gf_false;
- /* Phase 1 - Data migration is in progress from now on */
- ret = syncop_lookup (from, loc, &stbuf, NULL, dict, &xattr_rsp);
+ /* if the file migration is successful to this new target, then
+ * update the xattr on the old destination to point the new
+ * destination. We need to do update this only post migration
+ * as in case of failure the linkto needs to point to the source
+ * subvol */
+ ret = __dht_rebalance_create_dst_file(
+ this, to, from, loc, &stbuf, &dst_fd, fop_errno, file_has_holes);
if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed:"
- "%s: lookup failed on %s (%s)",
- loc->path, from->name, strerror (-ret));
- ret = -1;
- goto out;
+ gf_log(this->name, GF_LOG_ERROR,
+ "Create dst failed"
+ " on - %s for file - %s",
+ to->name, loc->path);
+ goto out;
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "destination for file "
+ "- %s is changed to - %s",
+ loc->path, to->name);
+ clean_dst = _gf_true;
}
+ }
+
+ if (ret) {
+ goto out;
+ }
+
+ /* Open the source, and also update mode/xattr */
+ ret = __dht_rebalance_open_src_file(this, from, to, loc, &stbuf, &src_fd,
+ &clean_src, fop_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: failed to open %s on %s", loc->path,
+ from->name);
+ goto out;
+ }
+
+ /* TODO: move all xattr related operations to fd based operations */
+ ret = syncop_listxattr(from, loc, &xattr, NULL, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, *fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to get xattr from %s",
+ loc->path, from->name);
+ ret = -1;
+ goto out;
+ }
+
+ /* Copying posix acls to the linkto file messes up the permissions*/
+ dht_strip_out_acls(xattr);
+
+ /* Remove the linkto xattr as we don't want to overwrite the value
+ * set on the dst.
+ */
+ dict_del(xattr, conf->link_xattr_name);
+
+ /* We need to error out if this fails as having the wrong shard xattrs
+ * set on the dst could cause data corruption
+ */
+ ret = syncop_fsetxattr(to, dst_fd, xattr, 0, NULL, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to set xattr on %s", loc->path, to->name);
+ ret = -1;
+ goto out;
+ }
+ if (xattr_rsp) {
/* we no more require this key */
- dict_del (dict, conf->link_xattr_name);
-
- /* preserve source mode, so set the same to the destination */
- src_ia_prot = stbuf.ia_prot;
+ dict_del(dict, conf->link_xattr_name);
+ dict_unref(xattr_rsp);
+ }
+
+ ret = syncop_fstat(from, src_fd, &stbuf, dict, &xattr_rsp);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:failed to lookup %s on %s ", loc->path,
+ from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ /* Check again if file has hardlink */
+ ret = __check_file_has_hardlink(this, loc, &stbuf, xattr_rsp, flag, defrag,
+ conf, fop_errno);
+ if (ret) {
+ if (ret == HARDLINK_MIG_INPROGRESS)
+ ret = 0;
+ goto out;
+ }
+
+ ret = __dht_rebalance_migrate_data(this, defrag, from, to, src_fd, dst_fd,
+ stbuf.ia_size, file_has_holes,
+ fop_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s: failed to migrate data", loc->path);
- /* Check if file can be migrated */
- ret = __is_file_migratable (this, loc, &stbuf, xattr_rsp, flag);
- if (ret) {
- if (ret == -2)
- ret = 0;
- goto out;
- }
- /* Take care of the special files */
- if (!IA_ISREG (stbuf.ia_type)) {
- /* Special files */
- ret = migrate_special_files (this, from, to, loc, &stbuf);
- goto out;
+ ret = -1;
+ goto out;
+ }
+
+ /* TODO: Sync the locks */
+
+ xdata = dict_new();
+ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s: failed to set last-fsync flag on "
+ "%s (%s)",
+ loc->path, to->name, strerror(ENOMEM));
+ }
+
+ ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, xdata, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING, "%s: failed to fsync on %s (%s)",
+ loc->path, to->name, strerror(-ret));
+ *fop_errno = -ret;
+ }
+
+ /* Phase 2 - Data-Migration Complete, Housekeeping updates pending */
+
+ ret = syncop_fstat(from, src_fd, &new_stbuf, NULL, NULL);
+ if (ret < 0) {
+ /* Failed to get the stat info */
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: failed to fstat file %s on %s ", loc->path,
+ from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ /* Lock the entire source file to prevent clients from taking a
+ lock on it as dht_lk does not handle file migration.
+
+ This still leaves a small window where conflicting locks can
+ be granted to different clients. If client1 requests a blocking
+ lock on the src file, it will be granted after the migrating
+ process releases its lock. If client2 requests a lock on the dst
+ data file, it will also be granted, but all FOPs will be redirected
+ to the dst data file.
+ */
+
+ /* Take meta lock */
+
+ if (conf->lock_migration_enabled) {
+ meta_dict = dict_new();
+ if (!meta_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "dict_new failed");
+
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
}
- /* TODO: move all xattr related operations to fd based operations */
- ret = syncop_listxattr (from, loc, &xattr, NULL, NULL);
- if (ret < 0) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed:"
- "%s: failed to get xattr from %s (%s)",
- loc->path, from->name, strerror (-ret));
- ret = -1;
+ ret = dict_set_str(meta_dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value: key = %s,"
+ " path = %s",
+ GLUSTERFS_INTERNAL_FOP_KEY, loc->path);
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
}
- /* create the destination, with required modes/xattr */
- ret = __dht_rebalance_create_dst_file (to, from, loc, &stbuf,
- dict, &dst_fd, xattr);
- if (ret)
- goto out;
-
- ret = __dht_check_free_space (to, from, loc, &stbuf, flag);
-
+ ret = dict_set_int32(meta_dict, GF_META_LOCK_KEY, 1);
if (ret) {
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Trace dict_set failed");
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
}
- /* Open the source, and also update mode/xattr */
- ret = __dht_rebalance_open_src_file (from, to, loc, &stbuf, &src_fd);
+ ret = syncop_setxattr(from, loc, meta_dict, 0, NULL, NULL);
if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed: failed to open %s on %s",
- loc->path, from->name);
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Trace syncop_setxattr metalock failed");
+
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ } else {
+ meta_locked = _gf_true;
}
+ }
+ if (!conf->lock_migration_enabled) {
+ plock.l_type = F_WRLCK;
+ plock.l_start = 0;
+ plock.l_len = 0;
+ plock.l_whence = SEEK_SET;
- ret = syncop_fstat (from, src_fd, &stbuf, NULL, NULL);
+ ret = syncop_lk(from, src_fd, F_SETLK, &plock, NULL, NULL);
if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed:failed to lookup %s on %s ",
- loc->path, from->name);
- ret = -1;
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: Failed to lock on %s",
+ loc->path, from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
}
- /* Try to preserve 'holes' while migrating data */
- if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
- file_has_holes = 1;
+ p_locked = _gf_true;
+
+ } else {
+ INIT_LIST_HEAD(&locklist.list);
+
+ ret = syncop_getactivelk(from, loc, &locklist, NULL, NULL);
+ if (ret == 0) {
+ gf_log(this->name, GF_LOG_INFO, "No active locks on:%s", loc->path);
- /* All I/O happens in this function */
- ret = __dht_rebalance_migrate_data (from, to, src_fd, dst_fd,
- stbuf.ia_size, file_has_holes);
+ } else if (ret > 0) {
+ ret = syncop_setactivelk(to, loc, &locklist, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_LOCK_MIGRATION_FAILED, "write lock failed on:%s",
+ loc->path);
+
+ *fop_errno = -ret;
+ ret = -1;
+ goto metaunlock;
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_LOCK_MIGRATION_FAILED,
+ "getactivelk failed for file: %s", loc->path);
+ *fop_errno = -ret;
+ }
+ }
+
+ /* source would have both sticky bit and sgid bit set, reset it to 0,
+ and set the source permission on destination, if it was not set
+ prior to setting rebalance-modes in source */
+ if (!src_ia_prot.sticky)
+ new_stbuf.ia_prot.sticky = 0;
+
+ if (!src_ia_prot.sgid)
+ new_stbuf.ia_prot.sgid = 0;
+
+ /* TODO: if the source actually had sticky bit, or sgid bit set,
+ we are not handling it */
+
+ ret = syncop_fsetattr(
+ to, dst_fd, &new_stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE), NULL, NULL,
+ NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to perform setattr on %s ",
+ loc->path, to->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto metaunlock;
+ }
+
+ /* Because 'futimes' is not portable */
+ ret = syncop_setattr(to, loc, &new_stbuf,
+ (GF_SET_ATTR_MTIME | GF_SET_ATTR_ATIME), NULL, NULL,
+ NULL, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s: failed to perform setattr on %s ", loc->path, to->name);
+ *fop_errno = -ret;
+ }
+
+ if (target_changed) {
+ dict_del(dict, GLUSTERFS_POSIXLK_COUNT);
+ ret = dict_set_str(dict, conf->link_xattr_name, to->name);
if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed: %s: failed to migrate data",
- loc->path);
- /* reset the destination back to 0 */
- ret = syncop_ftruncate (to, dst_fd, 0, NULL, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed: "
- "%s: failed to reset target size back to 0 (%s)",
- loc->path, strerror (-ret));
- }
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to set xattr in dict for %s (linkto:%s)", loc->path,
+ to->name);
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+ ret = syncop_setxattr(old_target, loc, dict, 0, NULL, NULL);
+ if (ret && -ret != ESTALE && -ret != ENOENT) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to set xattr on %s in %s", loc->path,
+ old_target->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ } else if (-ret == ESTALE || -ret == ENOENT) {
+ /* The failure ESTALE indicates that the linkto
+ * file on the hashed subvol might have been deleted.
+ * In this case will create a linkto file with new target
+ * as linkto xattr value*/
+ linkto_fd = fd_create(loc->inode, DHT_REBALANCE_PID);
+ if (!linkto_fd) {
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ DHT_MSG_MIGRATE_FILE_FAILED, "%s: fd create failed",
+ loc->path);
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+ ret = syncop_create(old_target, loc, O_RDWR, DHT_LINKFILE_MODE,
+ linkto_fd, NULL, dict, NULL);
+ if (ret != 0 && -ret != EEXIST && -ret != ESTALE) {
+ *fop_errno = -ret;
ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to create linkto file on %s in %s", loc->path,
+ old_target->name);
goto out;
+ } else if (ret == 0) {
+ ret = syncop_fsetattr(old_target, linkto_fd, &stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL,
+ NULL, NULL, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "chown failed for %s on %s", loc->path,
+ old_target->name);
+ }
+ }
+ }
+ }
+
+ clean_dst = _gf_false;
+
+ /* Posix acls are not set on DHT linkto files as part of the initial
+ * initial xattrs set on the dst file, so these need
+ * to be set on the dst file after the linkto attrs are removed.
+ * TODO: Optimize this.
+ */
+ if (xattr) {
+ dict_unref(xattr);
+ xattr = NULL;
+ }
+
+ /* Set only the Posix ACLs this time */
+ ret = syncop_getxattr(from, loc, &xattr, POSIX_ACL_ACCESS_XATTR, NULL,
+ NULL);
+ if (ret < 0) {
+ if ((-ret != ENODATA) && (-ret != ENOATTR)) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to get xattr from %s",
+ loc->path, from->name);
+ *fop_errno = -ret;
+ }
+ } else {
+ ret = syncop_setxattr(to, loc, xattr, 0, NULL, NULL);
+ if (ret < 0) {
+ /* Potential problem here where Posix ACLs will
+ * not be set on the target file */
+
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to set xattr on %s",
+ loc->path, to->name);
+ *fop_errno = -ret;
+ }
+ }
+
+ /* The src file is being unlinked after this so we don't need
+ to clean it up */
+ clean_src = _gf_false;
+
+ /* Make the source as a linkfile first before deleting it */
+ empty_iatt.ia_prot.sticky = 1;
+ ret = syncop_fsetattr(from, src_fd, &empty_iatt, GF_SET_ATTR_MODE, NULL,
+ NULL, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to perform setattr on %s ",
+ loc->path, from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto metaunlock;
+ }
+
+ /* Free up the data blocks on the source node, as the whole
+ file is migrated */
+ ret = syncop_ftruncate(from, src_fd, 0, NULL, NULL, NULL, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s: failed to perform truncate on %s (%s)", loc->path,
+ from->name, strerror(-ret));
+ *fop_errno = -ret;
+ }
+
+ /* remove the 'linkto' xattr from the destination */
+ ret = syncop_fremovexattr(to, dst_fd, conf->link_xattr_name, 0, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s: failed to perform removexattr on %s (%s)", loc->path,
+ to->name, strerror(-ret));
+ *fop_errno = -ret;
+ }
+
+ /* Do a stat and check the gfid before unlink */
+
+ /*
+ * Cached file changes its state from non-linkto to linkto file after
+ * migrating data. If lookup from any other mount-point is performed,
+ * converted-linkto-cached file will be treated as a stale and will be
+ * unlinked. But by this time, file is already migrated. So further
+ * failure because of ENOENT should not be treated as error
+ */
+
+ ret = syncop_stat(from, loc, &empty_iatt, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to do a stat on %s", loc->path, from->name);
+
+ if (-ret != ENOENT) {
+ *fop_errno = -ret;
+ ret = -1;
+ goto metaunlock;
}
- /* TODO: Sync the locks */
+ rcvd_enoent_from_src = 1;
+ }
- ret = syncop_fsync (to, dst_fd, 0, NULL, NULL);
+ if ((gf_uuid_compare(empty_iatt.ia_gfid, loc->gfid) == 0) &&
+ (!rcvd_enoent_from_src) && delete_src_linkto) {
+ /* take out the source from namespace */
+ ret = syncop_unlink(from, loc, NULL, NULL);
if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to fsync on %s (%s)",
- loc->path, to->name, strerror (-ret));
- ret = -1;
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to perform unlink on %s", loc->path, from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto metaunlock;
}
+ }
+ ret = syncop_lookup(this, loc, NULL, NULL, NULL, NULL);
+ if (ret) {
+ gf_msg_debug(this->name, -ret,
+ "%s: failed to lookup the file on subvolumes", loc->path);
+ *fop_errno = -ret;
+ }
- /* Phase 2 - Data-Migration Complete, Housekeeping updates pending */
-
- ret = syncop_fstat (from, src_fd, &new_stbuf, NULL, NULL);
- if (ret < 0) {
- /* Failed to get the stat info */
- gf_msg ( this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed: failed to fstat file %s on %s ",
- loc->path, from->name);
- ret = -1;
- goto out;
- }
+ gf_msg(this->name, log_level, 0, DHT_MSG_MIGRATE_FILE_COMPLETE,
+ "completed migration of %s from subvolume %s to %s", loc->path,
+ from->name, to->name);
- /* source would have both sticky bit and sgid bit set, reset it to 0,
- and set the source permission on destination, if it was not set
- prior to setting rebalance-modes in source */
- if (!src_ia_prot.sticky)
- new_stbuf.ia_prot.sticky = 0;
+ ret = 0;
- if (!src_ia_prot.sgid)
- new_stbuf.ia_prot.sgid = 0;
+metaunlock:
- /* TODO: if the source actually had sticky bit, or sgid bit set,
- we are not handling it */
+ if (conf->lock_migration_enabled && meta_locked) {
+ dict_del(meta_dict, GF_META_LOCK_KEY);
- ret = syncop_fsetattr (to, dst_fd, &new_stbuf,
- (GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_MODE), NULL, NULL, NULL, NULL);
+ ret = dict_set_int32(meta_dict, GF_META_UNLOCK_KEY, 1);
if (ret) {
- gf_msg (this->name, GF_LOG_WARNING, -ret,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed:"
- "%s: failed to perform setattr on %s ",
- loc->path, to->name);
- ret = -1;
- goto out;
- }
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Trace dict_set failed");
- /* Because 'futimes' is not portable */
- ret = syncop_setattr (to, loc, &new_stbuf,
- (GF_SET_ATTR_MTIME | GF_SET_ATTR_ATIME),
- NULL, NULL, NULL, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to perform setattr on %s ",
- loc->path, to->name);
- ret = -1;
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
}
- /* Make the source as a linkfile first before deleting it */
- empty_iatt.ia_prot.sticky = 1;
- ret = syncop_fsetattr (from, src_fd, &empty_iatt,
- GF_SET_ATTR_MODE, NULL, NULL, NULL, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_WARNING, -ret,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed:"
- "%s: failed to perform setattr on %s ",
- loc->path, from->name);
- ret = -1;
- goto out;
- }
+ if (clean_dst == _gf_false)
+ ret = dict_set_int32(meta_dict, "status", 1);
+ else
+ ret = dict_set_int32(meta_dict, "status", 0);
- /* Free up the data blocks on the source node, as the whole
- file is migrated */
- ret = syncop_ftruncate (from, src_fd, 0, NULL, NULL);
if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to perform truncate on %s (%s)",
- loc->path, from->name, strerror (-ret));
- ret = -1;
- }
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Trace dict_set failed");
- /* remove the 'linkto' xattr from the destination */
- ret = syncop_fremovexattr (to, dst_fd, conf->link_xattr_name, 0, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to perform removexattr on %s (%s)",
- loc->path, to->name, strerror (-ret));
- ret = -1;
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
}
- /* Do a stat and check the gfid before unlink */
-
- /*
- * Cached file changes its state from non-linkto to linkto file after
- * migrating data. If lookup from any other mount-point is performed,
- * converted-linkto-cached file will be treated as a stale and will be
- * unlinked. But by this time, file is already migrated. So further
- * failure because of ENOENT should not be treated as error
- */
-
- ret = syncop_stat (from, loc, &empty_iatt, NULL, NULL);
+ ret = syncop_setxattr(from, loc, meta_dict, 0, NULL, NULL);
if (ret) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "%s: failed to do a stat on %s (%s)",
- loc->path, from->name, strerror (-ret));
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Trace syncop_setxattr meta unlock failed");
- if (-ret != ENOENT) {
- ret = -1;
- goto out;
- }
-
- rcvd_enoent_from_src = 1;
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
}
+ }
- if ((gf_uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0 ) &&
- (!rcvd_enoent_from_src)) {
- /* take out the source from namespace */
- ret = syncop_unlink (from, loc, NULL, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "%s: failed to perform unlink on %s (%s)",
- loc->path, from->name, strerror (-ret));
- ret = -1;
- goto out;
- }
+out:
+ if (clean_src) {
+ /* Revert source mode and xattr changes*/
+ lk_ret = __dht_migration_cleanup_src_file(this, loc, src_fd, from,
+ &src_ia_prot);
+ if (lk_ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to cleanup source file on %s", loc->path,
+ from->name);
}
-
- ret = syncop_lookup (this, loc, NULL, NULL, NULL, NULL);
- if (ret) {
- gf_msg_debug (this->name, 0,
- "%s: failed to lookup the file on subvolumes (%s)",
- loc->path, strerror (-ret));
- ret = -1;
+ }
+
+ /* reset the destination back to 0 */
+ if (clean_dst) {
+ lk_ret = syncop_ftruncate(to, dst_fd, 0, NULL, NULL, NULL, NULL);
+ if (lk_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -lk_ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: "
+ "%s: failed to reset target size back to 0",
+ loc->path);
}
+ }
- gf_msg (this->name, GF_LOG_INFO, 0,
- DHT_MSG_MIGRATE_FILE_COMPLETE,
- "completed migration of %s from subvolume %s to %s",
- loc->path, from->name, to->name);
+ if (inodelk_locked) {
+ flock.l_type = F_UNLCK;
- ret = 0;
-out:
- if (locked) {
- flock.l_type = F_UNLCK;
-
- lk_ret = syncop_inodelk (from, DHT_FILE_MIGRATE_DOMAIN,
- &tmp_loc, F_SETLK, &flock, NULL, NULL);
- if (lk_ret < 0) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "%s: failed to unlock file on %s (%s)",
- loc->path, from->name, strerror (-lk_ret));
- }
+ lk_ret = syncop_inodelk(from, DHT_FILE_MIGRATE_DOMAIN, &tmp_loc,
+ F_SETLK, &flock, NULL, NULL);
+ if (lk_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -lk_ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to unlock file on %s", loc->path, from->name);
+ }
+ }
+
+ if (entrylk_locked) {
+ lk_ret = syncop_entrylk(hashed_subvol, DHT_ENTRY_SYNC_DOMAIN,
+ &parent_loc, loc->name, ENTRYLK_UNLOCK,
+ ENTRYLK_UNLOCK, NULL, NULL);
+ if (lk_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -lk_ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to unlock entrylk on %s", loc->path,
+ hashed_subvol->name);
}
+ }
- if (dict)
- dict_unref (dict);
+ if (p_locked) {
+ plock.l_type = F_UNLCK;
+ lk_ret = syncop_lk(from, src_fd, F_SETLK, &plock, NULL, NULL);
- if (xattr)
- dict_unref (xattr);
- if (xattr_rsp)
- dict_unref (xattr_rsp);
+ if (lk_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -lk_ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to unlock file on %s", loc->path, from->name);
+ }
+ }
- if (dst_fd)
- syncop_close (dst_fd);
- if (src_fd)
- syncop_close (src_fd);
+ lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES, NULL,
+ NULL);
+ if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) {
+ gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0,
+ "%s: removexattr failed key %s", loc->path,
+ GF_PROTECT_FROM_EXTERNAL_WRITES);
+ }
- loc_wipe (&tmp_loc);
+ if (dict)
+ dict_unref(dict);
- return ret;
-}
+ if (xattr)
+ dict_unref(xattr);
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
-static int
-rebalance_task (void *data)
-{
- int ret = -1;
- dht_local_t *local = NULL;
- call_frame_t *frame = NULL;
+ if (dst_fd)
+ syncop_close(dst_fd);
- frame = data;
+ if (src_fd)
+ syncop_close(src_fd);
+ if (linkto_fd)
+ syncop_close(linkto_fd);
- local = frame->local;
+ if (xdata)
+ dict_unref(xdata);
- /* This function is 'synchrounous', hence if it returns,
- we are done with the task */
- ret = dht_migrate_file (THIS, &local->loc, local->rebalance.from_subvol,
- local->rebalance.target_node, local->flags);
+ loc_wipe(&tmp_loc);
+ loc_wipe(&parent_loc);
- return ret;
+ return ret;
}
static int
-rebalance_task_completion (int op_ret, call_frame_t *sync_frame, void *data)
+rebalance_task(void *data)
{
- int ret = -1;
- uint64_t layout_int = 0;
- dht_layout_t *layout = 0;
- xlator_t *this = NULL;
- dht_local_t *local = NULL;
- int32_t op_errno = EINVAL;
-
- this = THIS;
- local = sync_frame->local;
-
- if (!op_ret) {
- /* Make sure we have valid 'layout' in inode ctx
- after the operation */
- ret = inode_ctx_del (local->loc.inode, this, &layout_int);
- if (!ret && layout_int) {
- layout = (dht_layout_t *)(long)layout_int;
- dht_layout_unref (this, layout);
- }
+ int ret = -1;
+ dht_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ int fop_errno = 0;
- ret = dht_layout_preset (this, local->rebalance.target_node,
- local->loc.inode);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set inode ctx", local->loc.path);
- }
+ frame = data;
- if (op_ret == -1) {
- /* Failure of migration process, mostly due to write process.
- as we can't preserve the exact errno, lets say there was
- no space to migrate-data
- */
- op_errno = ENOSPC;
- }
+ local = frame->local;
- if (op_ret == 1) {
- /* migration didn't happen, but is not a failure, let the user
- understand that he doesn't have permission to migrate the
- file.
- */
- op_ret = -1;
- op_errno = EPERM;
- }
+ /* This function is 'synchrounous', hence if it returns,
+ we are done with the task */
+ ret = dht_migrate_file(THIS, &local->loc, local->rebalance.from_subvol,
+ local->rebalance.target_node, local->flags,
+ &fop_errno);
- DHT_STACK_UNWIND (setxattr, sync_frame, op_ret, op_errno, NULL);
- return 0;
+ return ret;
+}
+
+static int
+rebalance_task_completion(int op_ret, call_frame_t *sync_frame, void *data)
+{
+ int32_t op_errno = EINVAL;
+
+ if (op_ret == -1) {
+ /* Failure of migration process, mostly due to write process.
+ as we can't preserve the exact errno, lets say there was
+ no space to migrate-data
+ */
+ op_errno = ENOSPC;
+ } else if (op_ret == 1) {
+ /* migration didn't happen, but is not a failure, let the user
+ understand that he doesn't have permission to migrate the
+ file.
+ */
+ op_ret = -1;
+ op_errno = EPERM;
+ } else if (op_ret != 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ }
+
+ DHT_STACK_UNWIND(setxattr, sync_frame, op_ret, op_errno, NULL);
+ return 0;
}
int
-dht_start_rebalance_task (xlator_t *this, call_frame_t *frame)
+dht_start_rebalance_task(xlator_t *this, call_frame_t *frame)
{
- int ret = -1;
+ int ret = -1;
- ret = synctask_new (this->ctx->env, rebalance_task,
- rebalance_task_completion,
- frame, frame);
- return ret;
+ ret = synctask_new(this->ctx->env, rebalance_task,
+ rebalance_task_completion, frame, frame);
+ return ret;
}
int
-gf_listener_stop (xlator_t *this)
+gf_listener_stop(xlator_t *this)
{
- glusterfs_ctx_t *ctx = NULL;
- cmd_args_t *cmd_args = NULL;
- int ret = 0;
-
- ctx = this->ctx;
- GF_ASSERT (ctx);
- cmd_args = &ctx->cmd_args;
- if (cmd_args->sock_file) {
- ret = unlink (cmd_args->sock_file);
- if (ret && (ENOENT == errno)) {
- ret = 0;
- }
- }
-
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to unlink listener "
- "socket %s, error: %s", cmd_args->sock_file,
- strerror (errno));
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+
+ ctx = this->ctx;
+ GF_ASSERT(ctx);
+ cmd_args = &ctx->cmd_args;
+ if (cmd_args->sock_file) {
+ ret = sys_unlink(cmd_args->sock_file);
+ if (ret && (ENOENT == errno)) {
+ ret = 0;
}
- return ret;
+ }
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, DHT_MSG_SOCKET_ERROR,
+ "Failed to unlink listener "
+ "socket %s",
+ cmd_args->sock_file);
+ }
+ return ret;
}
void
-dht_build_root_inode (xlator_t *this, inode_t **inode)
+dht_build_root_inode(xlator_t *this, inode_t **inode)
{
- inode_table_t *itable = NULL;
- uuid_t root_gfid = {0, };
+ inode_table_t *itable = NULL;
+ static uuid_t root_gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
- itable = inode_table_new (0, this);
- if (!itable)
- return;
+ itable = inode_table_new(0, this);
+ if (!itable)
+ return;
- root_gfid[15] = 1;
- *inode = inode_find (itable, root_gfid);
+ *inode = inode_find(itable, root_gfid);
}
void
-dht_build_root_loc (inode_t *inode, loc_t *loc)
+dht_build_root_loc(inode_t *inode, loc_t *loc)
{
- loc->path = "/";
- loc->inode = inode;
- loc->inode->ia_type = IA_IFDIR;
- memset (loc->gfid, 0, 16);
- loc->gfid[15] = 1;
+ loc->path = "/";
+ loc->inode = inode;
+ loc->inode->ia_type = IA_IFDIR;
+ memset(loc->gfid, 0, 16);
+ loc->gfid[15] = 1;
}
-
/* return values: 1 -> error, bug ignore and continue
0 -> proceed
-1 -> error, handle it */
int32_t
-gf_defrag_handle_migrate_error (int32_t op_errno, gf_defrag_info_t *defrag)
+gf_defrag_handle_migrate_error(int32_t op_errno, gf_defrag_info_t *defrag)
{
- /* if errno is not ENOSPC or ENOTCONN, we can still continue
- with rebalance process */
- if ((op_errno != ENOSPC) || (op_errno != ENOTCONN))
- return 1;
-
- if (op_errno == ENOTCONN) {
- /* Most probably mount point went missing (mostly due
- to a brick down), say rebalance failure to user,
- let him restart it if everything is fine */
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- return -1;
- }
-
- if (op_errno == ENOSPC) {
- /* rebalance process itself failed, may be
- remote brick went down, or write failed due to
- disk full etc etc.. */
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- return -1;
- }
+ int ret = 0;
+ /* if errno is not ENOTCONN, we can still continue
+ with rebalance process */
+ if (op_errno != ENOTCONN) {
+ ret = 1;
+ goto out;
+ }
+
+ if (op_errno == ENOTCONN) {
+ /* Most probably mount point went missing (mostly due
+ to a brick down), say rebalance failure to user,
+ let him restart it if everything is fine */
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ ret = -1;
+ goto out;
+ }
- return 0;
+out:
+ return ret;
}
static gf_boolean_t
-gf_defrag_pattern_match (gf_defrag_info_t *defrag, char *name, uint64_t size)
+gf_defrag_pattern_match(gf_defrag_info_t *defrag, char *name, uint64_t size)
{
- gf_defrag_pattern_list_t *trav = NULL;
- gf_boolean_t match = _gf_false;
- gf_boolean_t ret = _gf_false;
+ gf_defrag_pattern_list_t *trav = NULL;
+ gf_boolean_t match = _gf_false;
+ gf_boolean_t ret = _gf_false;
- GF_VALIDATE_OR_GOTO ("dht", defrag, out);
+ GF_VALIDATE_OR_GOTO("dht", defrag, out);
- trav = defrag->defrag_pattern;
- while (trav) {
- if (!fnmatch (trav->path_pattern, name, FNM_NOESCAPE)) {
- match = _gf_true;
- break;
- }
- trav = trav->next;
+ trav = defrag->defrag_pattern;
+ while (trav) {
+ if (!fnmatch(trav->path_pattern, name, FNM_NOESCAPE)) {
+ match = _gf_true;
+ break;
}
+ trav = trav->next;
+ }
- if ((match == _gf_true) && (size >= trav->size))
- ret = _gf_true;
+ if ((match == _gf_true) && (size >= trav->size))
+ ret = _gf_true;
- out:
- return ret;
+out:
+ return ret;
+}
+
+int
+dht_dfreaddirp_done(dht_dfoffset_ctx_t *offset_var, int cnt)
+{
+ int i;
+ int result = 1;
+
+ for (i = 0; i < cnt; i++) {
+ if (offset_var[i].readdir_done == 0) {
+ result = 0;
+ break;
+ }
+ }
+ return result;
+}
+
+int static gf_defrag_ctx_subvols_init(dht_dfoffset_ctx_t *offset_var,
+ xlator_t *this)
+{
+ int i;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf)
+ return -1;
+
+ for (i = 0; i < conf->local_subvols_cnt; i++) {
+ offset_var[i].this = conf->local_subvols[i];
+ offset_var[i].offset = (off_t)0;
+ offset_var[i].readdir_done = 0;
+ }
+
+ return 0;
}
-/* We do a depth first traversal of directories. But before we move into
- * subdirs, we complete the data migration of those directories whose layouts
- * have been fixed
+static int
+dht_get_first_non_null_index(subvol_nodeuuids_info_t *entry)
+{
+ int i = 0;
+ int index = 0;
+
+ for (i = 0; i < entry->count; i++) {
+ if (!gf_uuid_is_null(entry->elements[i].uuid)) {
+ index = i;
+ goto out;
+ }
+ }
+
+ if (i == entry->count) {
+ index = -1;
+ }
+out:
+ return index;
+}
+
+/* Return value
+ * 0 : this node does not migrate the file
+ * 1 : this node migrates the file
+ *
+ * Use the hash value of the gfid to determine which node will migrate files.
+ * Using the gfid instead of the name also ensures that the same node handles
+ * all hardlinks.
*/
-int
-gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
- dict_t *migrate_data)
+gf_boolean_t
+gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
{
- int ret = -1;
- loc_t entry_loc = {0,};
- fd_t *fd = NULL;
- gf_dirent_t entries;
- gf_dirent_t *tmp = NULL;
- gf_dirent_t *entry = NULL;
- gf_boolean_t free_entries = _gf_false;
- off_t offset = 0;
- dict_t *dict = NULL;
- struct iatt iatt = {0,};
- int32_t op_errno = 0;
- char *uuid_str = NULL;
- uuid_t node_uuid = {0,};
- struct timeval dir_start = {0,};
- struct timeval end = {0,};
- double elapsed = {0,};
- struct timeval start = {0,};
- int loglevel = GF_LOG_TRACE;
-
- gf_log (this->name, GF_LOG_INFO, "migrate data called on %s",
- loc->path);
- gettimeofday (&dir_start, NULL);
-
- fd = fd_create (loc->inode, defrag->pid);
- if (!fd) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
- goto out;
+ gf_boolean_t ret = _gf_false;
+ int i = local_subvol_index;
+ char *str = NULL;
+ uint32_t hashval = 0;
+ int32_t index = 0;
+ dht_conf_t *conf = NULL;
+ char buf[UUID_CANONICAL_FORM_LEN + 1] = {
+ 0,
+ };
+ subvol_nodeuuids_info_t *entry = NULL;
+
+ conf = this->private;
+
+ /* Pure distribute. A subvol in this case
+ will be handled by only one node */
+
+ entry = &(conf->local_nodeuuids[i]);
+ if (entry->count == 1) {
+ return 1;
+ }
+
+ str = uuid_utoa_r(gfid, buf);
+ if (dht_hash_compute(this, 0, str, &hashval) == 0) {
+ index = (hashval % entry->count);
+ if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
+ /* Index matches this node's nodeuuid.*/
+ ret = _gf_true;
+ goto out;
}
- ret = syncop_opendir (this, loc, fd, NULL, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_DATA_FAILED,
- "Migrate data failed: Failed to open dir %s",
- loc->path);
- ret = -1;
+ /* Brick down - some other node has to migrate these files*/
+ if (gf_uuid_is_null(entry->elements[index].uuid)) {
+ /* Fall back to the first non-null index */
+ index = dht_get_first_non_null_index(entry);
+
+ if (index == -1) {
+ /* None of the bricks in the subvol are up.
+ * CHILD_DOWN will kill the process soon */
+
+ return _gf_false;
+ }
+
+ if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
+ /* Index matches this node's nodeuuid.*/
+ ret = _gf_true;
goto out;
+ }
}
+ }
+out:
+ return ret;
+}
- INIT_LIST_HEAD (&entries.list);
+int
+gf_defrag_migrate_single_file(void *opaque)
+{
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ int ret = 0;
+ gf_dirent_t *entry = NULL;
+ struct timeval start = {
+ 0,
+ };
+ loc_t entry_loc = {
+ 0,
+ };
+ loc_t *loc = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+ dict_t *migrate_data = NULL;
+ struct timeval end = {
+ 0,
+ };
+ double elapsed = {
+ 0,
+ };
+ struct dht_container *rebal_entry = NULL;
+ inode_t *inode = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ call_frame_t *statfs_frame = NULL;
+ xlator_t *old_THIS = NULL;
+ data_t *tmp = NULL;
+ int fop_errno = 0;
+ gf_dht_migrate_data_type_t rebal_type = GF_DHT_MIGRATE_DATA;
+ char value[MAX_REBAL_TYPE_SIZE] = {
+ 0,
+ };
+ struct iatt *iatt_ptr = NULL;
+ gf_boolean_t update_skippedcount = _gf_true;
+ int i = 0;
+ gf_boolean_t should_i_migrate = 0;
+
+ rebal_entry = (struct dht_container *)opaque;
+ if (!rebal_entry) {
+ gf_log("DHT", GF_LOG_ERROR, "rebal_entry is NULL");
+ ret = -1;
+ goto out;
+ }
- while ((ret = syncop_readdirp (this, fd, 131072, offset, &entries,
- NULL, NULL)) != 0) {
+ this = rebal_entry->this;
- if (ret < 0) {
+ conf = this->private;
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_DATA_FAILED,
- "%s: Migrate data failed: Readdir returned"
- " %s. Aborting migrate-data", loc->path,
- strerror(-ret));
- ret = -1;
- goto out;
- }
+ defrag = conf->defrag;
- if (list_empty (&entries.list))
- break;
+ loc = rebal_entry->parent_loc;
- free_entries = _gf_true;
+ migrate_data = rebal_entry->migrate_data;
- list_for_each_entry_safe (entry, tmp, &entries.list, list) {
+ entry = rebal_entry->df_entry;
+ iatt_ptr = &entry->d_stat;
- if (dict) {
- dict_unref (dict);
- dict = NULL;
- }
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = -1;
+ goto out;
+ }
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
- ret = 1;
- goto out;
- }
+ if (defrag->stats == _gf_true) {
+ gettimeofday(&start, NULL);
+ }
- offset = entry->d_off;
+ if (defrag->defrag_pattern &&
+ (gf_defrag_pattern_match(defrag, entry->d_name,
+ entry->d_stat.ia_size) == _gf_false)) {
+ gf_log(this->name, GF_LOG_ERROR, "pattern_match failed");
+ goto out;
+ }
- if (!strcmp (entry->d_name, ".") ||
- !strcmp (entry->d_name, ".."))
- continue;
+ memset(&entry_loc, 0, sizeof(entry_loc));
- if (IA_ISDIR (entry->d_stat.ia_type))
- continue;
+ ret = dht_build_child_loc(this, &entry_loc, loc, entry->d_name);
+ if (ret) {
+ LOCK(&defrag->lock);
+ {
+ defrag->total_failures += 1;
+ }
+ UNLOCK(&defrag->lock);
- defrag->num_files_lookedup++;
- if (defrag->stats == _gf_true) {
- gettimeofday (&start, NULL);
- }
+ ret = 0;
- if (defrag->defrag_pattern &&
- (gf_defrag_pattern_match (defrag, entry->d_name,
- entry->d_stat.ia_size)
- == _gf_false)) {
- continue;
- }
+ gf_log(this->name, GF_LOG_ERROR, "Child loc build failed");
- loc_wipe (&entry_loc);
- ret =dht_build_child_loc (this, &entry_loc, loc,
- entry->d_name);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Child loc"
- " build failed");
- goto out;
- }
+ goto out;
+ }
- if (gf_uuid_is_null (entry->d_stat.ia_gfid)) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_GFID_NULL,
- "%s/%s gfid not present", loc->path,
- entry->d_name);
- continue;
- }
+ should_i_migrate = gf_defrag_should_i_migrate(
+ this, rebal_entry->local_subvol_index, entry->d_stat.ia_gfid);
- gf_uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
+ gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid);
- if (gf_uuid_is_null (loc->gfid)) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_GFID_NULL,
- "%s/%s gfid not present", loc->path,
- entry->d_name);
- continue;
- }
+ gf_uuid_copy(entry_loc.pargfid, loc->gfid);
- gf_uuid_copy (entry_loc.pargfid, loc->gfid);
+ ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL);
- entry_loc.inode->ia_type = entry->d_stat.ia_type;
+ if (!should_i_migrate) {
+ /* this node isn't supposed to migrate the file. suppressing any
+ * potential error from lookup as this file is under migration by
+ * another node */
+ if (ret) {
+ gf_msg_debug(this->name, -ret,
+ "Ignoring lookup failure: node isn't migrating %s",
+ entry_loc.path);
+ ret = 0;
+ }
+ gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
+ goto out;
+ }
- ret = syncop_lookup (this, &entry_loc, &iatt, NULL,
- NULL, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed:%s lookup failed",
- entry_loc.path);
- ret = -1;
- continue;
- }
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s lookup failed", entry_loc.path);
- ret = syncop_getxattr (this, &entry_loc, &dict,
- GF_XATTR_NODE_UUID_KEY, NULL,
- NULL);
- if(ret < 0) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed:"
- "Failed to get node-uuid for %s",
- entry_loc.path);
- ret = -1;
- continue;
- }
+ /* Increase failure count only for remove-brick op, so that
+ * user is warned to check the removed-brick for any files left
+ * unmigrated
+ */
+ if (conf->decommission_subvols_cnt) {
+ LOCK(&defrag->lock);
+ {
+ defrag->total_failures += 1;
+ }
+ UNLOCK(&defrag->lock);
+ }
- ret = dict_get_str (dict, GF_XATTR_NODE_UUID_KEY,
- &uuid_str);
- if(ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to "
- "get node-uuid from dict for %s",
- entry_loc.path);
- ret = -1;
- continue;
- }
+ ret = 0;
+ goto out;
+ }
- if (gf_uuid_parse (uuid_str, node_uuid)) {
- gf_log (this->name, GF_LOG_ERROR, "gf_uuid_parse "
- "failed for %s", entry_loc.path);
- continue;
- }
+ iatt_ptr = &iatt;
- /* if file belongs to different node, skip migration
- * the other node will take responsibility of migration
- */
- if (gf_uuid_compare (node_uuid, defrag->node_uuid)) {
- gf_msg_trace (this->name, 0, "%s does not"
- "belong to this node",
- entry_loc.path);
- continue;
- }
+ hashed_subvol = dht_subvol_get_hashed(this, &entry_loc);
+ if (!hashed_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get hashed subvol for %s", entry_loc.path);
+ ret = 0;
+ goto out;
+ }
- uuid_str = NULL;
-
- /* if distribute is present, it will honor this key.
- * -1, ENODATA is returned if distribute is not present
- * or file doesn't have a link-file. If file has
- * link-file, the path of link-file will be the value,
- * and also that guarantees that file has to be mostly
- * migrated */
-
- ret = syncop_getxattr (this, &entry_loc, NULL,
- GF_XATTR_LINKINFO_KEY, NULL,
- NULL);
- if (ret < 0) {
- if (-ret != ENODATA) {
- loglevel = GF_LOG_ERROR;
- defrag->total_failures += 1;
- } else {
- loglevel = GF_LOG_TRACE;
- }
- gf_log (this->name, loglevel, "%s: failed to "
- "get "GF_XATTR_LINKINFO_KEY" key - %s",
- entry_loc.path, strerror (-ret));
- ret = -1;
- continue;
- }
+ cached_subvol = dht_subvol_get_cached(this, entry_loc.inode);
+ if (!cached_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CACHED_SUBVOL_GET_FAILED,
+ "Failed to get cached subvol for %s", entry_loc.path);
- ret = syncop_setxattr (this, &entry_loc, migrate_data,
- 0, NULL, NULL);
- if (ret < 0) {
- op_errno = -ret;
- /* errno is overloaded. See
- * rebalance_task_completion () */
- if (op_errno == ENOSPC) {
- gf_msg_debug (this->name, 0,
- "migrate-data skipped for"
- " %s due to space "
- "constraints",
- entry_loc.path);
- defrag->skipped +=1;
- } else{
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "migrate-data failed for %s",
- entry_loc.path);
- defrag->total_failures +=1;
- }
-
- ret = gf_defrag_handle_migrate_error (op_errno,
- defrag);
-
- if (!ret)
- gf_msg_debug (this->name, 0,
- "migrate-data on %s "
- "failed: %s",
- entry_loc.path,
- strerror (op_errno));
- else if (ret == 1)
- continue;
- else if (ret == -1)
- goto out;
- } else if (ret > 0) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "migrate-data failed for %s",
- entry_loc.path);
- defrag->total_failures +=1;
- }
+ ret = 0;
+ goto out;
+ }
- LOCK (&defrag->lock);
+ if (hashed_subvol == cached_subvol) {
+ ret = 0;
+ goto out;
+ }
+
+ inode = inode_link(entry_loc.inode, entry_loc.parent, entry->d_name, &iatt);
+ inode_unref(entry_loc.inode);
+ /* use the inode returned by inode_link */
+ entry_loc.inode = inode;
+
+ old_THIS = THIS;
+ THIS = this;
+ statfs_frame = create_frame(this, this->ctx->pool);
+ if (!statfs_frame) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
+ "Insufficient memory. Frame creation failed");
+ ret = -1;
+ goto out;
+ }
+
+ /* async statfs information for honoring min-free-disk */
+ dht_get_du_info(statfs_frame, this, loc);
+ THIS = old_THIS;
+
+ tmp = dict_get(migrate_data, GF_XATTR_FILE_MIGRATE_KEY);
+ if (tmp) {
+ memcpy(value, tmp->data, tmp->len);
+ if (strcmp(value, "force") == 0)
+ rebal_type = GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS;
+
+ if (conf->decommission_in_progress)
+ rebal_type = GF_DHT_MIGRATE_HARDLINK;
+ }
+
+ ret = dht_migrate_file(this, &entry_loc, cached_subvol, hashed_subvol,
+ rebal_type, &fop_errno);
+ if (ret == 1) {
+ if (fop_errno == ENOSPC) {
+ gf_msg_debug(this->name, 0,
+ "migrate-data skipped for"
+ " %s due to space constraints",
+ entry_loc.path);
+
+ /* For remove-brick case if the source is not one of the
+ * removed-brick, do not mark the error as failure */
+ if (conf->decommission_subvols_cnt) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i] == cached_subvol) {
+ LOCK(&defrag->lock);
{
- defrag->total_files += 1;
- defrag->total_data += iatt.ia_size;
- }
- UNLOCK (&defrag->lock);
- if (defrag->stats == _gf_true) {
- gettimeofday (&end, NULL);
- elapsed = (end.tv_sec - start.tv_sec) * 1e6 +
- (end.tv_usec - start.tv_usec);
- gf_log (this->name, GF_LOG_INFO, "Migration of "
- "file:%s size:%"PRIu64" bytes took %.2f"
- "secs", entry_loc.path, iatt.ia_size,
- elapsed/1e6);
+ defrag->total_failures += 1;
+ update_skippedcount = _gf_false;
}
+ UNLOCK(&defrag->lock);
+
+ break;
+ }
}
+ }
- gf_dirent_free (&entries);
- free_entries = _gf_false;
- INIT_LIST_HEAD (&entries.list);
+ if (update_skippedcount) {
+ LOCK(&defrag->lock);
+ {
+ defrag->skipped += 1;
+ }
+ UNLOCK(&defrag->lock);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_MIGRATE_FILE_SKIPPED,
+ "File migration skipped for %s.", entry_loc.path);
+ }
+
+ } else if (fop_errno == ENOTSUP) {
+ gf_msg_debug(this->name, 0,
+ "migrate-data skipped for"
+ " hardlink %s ",
+ entry_loc.path);
+ LOCK(&defrag->lock);
+ {
+ defrag->skipped += 1;
+ }
+ UNLOCK(&defrag->lock);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_MIGRATE_FILE_SKIPPED,
+ "File migration skipped for %s.", entry_loc.path);
}
- gettimeofday (&end, NULL);
- elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 +
- (end.tv_usec - dir_start.tv_usec);
- gf_log (this->name, GF_LOG_INFO, "Migration operation on dir %s took "
- "%.2f secs", loc->path, elapsed/1e6);
ret = 0;
-out:
- if (free_entries)
- gf_dirent_free (&entries);
+ goto out;
+ } else if (ret < 0) {
+ if (fop_errno != EEXIST) {
+ gf_msg(this->name, GF_LOG_ERROR, fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED, "migrate-data failed for %s",
+ entry_loc.path);
+
+ LOCK(&defrag->lock);
+ {
+ defrag->total_failures += 1;
+ }
+ UNLOCK(&defrag->lock);
+ }
- loc_wipe (&entry_loc);
+ ret = gf_defrag_handle_migrate_error(fop_errno, defrag);
- if (dict)
- dict_unref(dict);
+ if (!ret) {
+ gf_msg(this->name, GF_LOG_ERROR, fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "migrate-data on %s failed:", entry_loc.path);
+ } else if (ret == 1) {
+ ret = 0;
+ }
- if (fd)
- fd_unref (fd);
- return ret;
+ goto out;
+ }
+
+ LOCK(&defrag->lock);
+ {
+ defrag->total_files += 1;
+ defrag->total_data += iatt.ia_size;
+ }
+ UNLOCK(&defrag->lock);
+
+ if (defrag->stats == _gf_true) {
+ gettimeofday(&end, NULL);
+ elapsed = gf_tvdiff(&start, &end);
+ gf_log(this->name, GF_LOG_INFO,
+ "Migration of "
+ "file:%s size:%" PRIu64
+ " bytes took %.2f"
+ "secs and ret: %d",
+ entry_loc.name, iatt.ia_size, elapsed / 1e6, ret);
+ }
+out:
+ if (statfs_frame) {
+ STACK_DESTROY(statfs_frame->root);
+ }
+
+ if (iatt_ptr) {
+ LOCK(&defrag->lock);
+ {
+ defrag->size_processed += iatt_ptr->ia_size;
+ }
+ UNLOCK(&defrag->lock);
+ }
+ loc_wipe(&entry_loc);
+
+ return ret;
}
-int
-gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
- dict_t *fix_layout, dict_t *migrate_data)
+void *
+gf_defrag_task(void *opaque)
{
- int ret = -1;
- loc_t entry_loc = {0,};
- fd_t *fd = NULL;
- gf_dirent_t entries;
- gf_dirent_t *tmp = NULL;
- gf_dirent_t *entry = NULL;
- gf_boolean_t free_entries = _gf_false;
- off_t offset = 0;
- struct iatt iatt = {0,};
- inode_t *linked_inode = NULL, *inode = NULL;
-
- ret = syncop_lookup (this, loc, &iatt, NULL, NULL, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Lookup failed on %s",
- loc->path);
- ret = -1;
- goto out;
+ struct list_head *q_head = NULL;
+ struct dht_container *iterator = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ int ret = 0;
+ pid_t pid = GF_CLIENT_PID_DEFRAG;
+
+ defrag = (gf_defrag_info_t *)opaque;
+ if (!defrag) {
+ gf_msg("dht", GF_LOG_ERROR, 0, 0, "defrag is NULL");
+ goto out;
+ }
+
+ syncopctx_setfspid(&pid);
+
+ q_head = &(defrag->queue[0].list);
+
+ /* The following while loop will dequeue one entry from the defrag->queue
+ under lock. We will update the defrag->global_error only when there
+ is an error which is critical to stop the rebalance process. The stop
+ message will be intimated to other migrator threads by setting the
+ defrag->defrag_status to GF_DEFRAG_STATUS_FAILED.
+
+ In defrag->queue, a low watermark (MIN_MIGRATE_QUEUE_COUNT) is
+ maintained so that crawler does not starve the file migration
+ workers and a high watermark (MAX_MIGRATE_QUEUE_COUNT) so that
+ crawler does not go far ahead in filling up the queue.
+ */
+
+ while (_gf_true) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ pthread_cond_broadcast(&defrag->rebalance_crawler_alarm);
+ pthread_cond_broadcast(&defrag->parallel_migration_cond);
+ goto out;
}
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) {
- ret = gf_defrag_migrate_data (this, defrag, loc, migrate_data);
- if (ret)
- goto out;
+ pthread_mutex_lock(&defrag->dfq_mutex);
+ {
+ /*Throttle down:
+ If the reconfigured count is less than current thread
+ count, then the current thread will sleep */
+
+ /*TODO: Need to refactor the following block to work
+ *under defrag->lock. For now access
+ * defrag->current_thread_count and rthcount under
+ * dfq_mutex lock */
+ while (!defrag->crawl_done && (defrag->recon_thread_count <
+ defrag->current_thread_count)) {
+ defrag->current_thread_count--;
+ gf_msg_debug("DHT", 0,
+ "Thread sleeping. "
+ "current thread count: %d",
+ defrag->current_thread_count);
+
+ pthread_cond_wait(&defrag->df_wakeup_thread,
+ &defrag->dfq_mutex);
+
+ defrag->current_thread_count++;
+ gf_msg_debug("DHT", 0,
+ "Thread wokeup. "
+ "current thread count: %d",
+ defrag->current_thread_count);
+ }
+
+ if (defrag->q_entry_count) {
+ iterator = list_entry(q_head->next, typeof(*iterator), list);
+
+ gf_msg_debug("DHT", 0,
+ "picking entry "
+ "%s",
+ iterator->df_entry->d_name);
+
+ list_del_init(&(iterator->list));
+
+ defrag->q_entry_count--;
+
+ if ((defrag->q_entry_count < MIN_MIGRATE_QUEUE_COUNT) &&
+ defrag->wakeup_crawler) {
+ pthread_cond_broadcast(&defrag->rebalance_crawler_alarm);
+ }
+ pthread_mutex_unlock(&defrag->dfq_mutex);
+ ret = gf_defrag_migrate_single_file((void *)iterator);
+
+ /*Critical errors: ENOTCONN and ENOSPACE*/
+ if (ret) {
+ dht_set_global_defrag_error(defrag, ret);
+
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+
+ pthread_cond_broadcast(&defrag->rebalance_crawler_alarm);
+
+ pthread_cond_broadcast(&defrag->parallel_migration_cond);
+
+ goto out;
+ }
+
+ gf_defrag_free_container(iterator);
+
+ continue;
+ } else {
+ /* defrag->crawl_done flag is set means crawling
+ file system is done and hence a list_empty when
+ the above flag is set indicates there are no more
+ entries to be added to the queue and rebalance is
+ finished */
+
+ if (!defrag->crawl_done) {
+ defrag->current_thread_count--;
+ gf_msg_debug("DHT", 0,
+ "Thread "
+ "sleeping while waiting "
+ "for migration entries. "
+ "current thread count:%d",
+ defrag->current_thread_count);
+
+ pthread_cond_wait(&defrag->parallel_migration_cond,
+ &defrag->dfq_mutex);
+ }
+
+ if (defrag->crawl_done && !defrag->q_entry_count) {
+ defrag->current_thread_count++;
+ gf_msg_debug("DHT", 0, "Exiting thread");
+
+ pthread_cond_broadcast(&defrag->parallel_migration_cond);
+ goto unlock;
+ } else {
+ defrag->current_thread_count++;
+ gf_msg_debug("DHT", 0,
+ "Thread woke up"
+ " as found migrating entries. "
+ "current thread count:%d",
+ defrag->current_thread_count);
+
+ pthread_mutex_unlock(&defrag->dfq_mutex);
+ continue;
+ }
+ }
}
+ unlock:
+ pthread_mutex_unlock(&defrag->dfq_mutex);
+ break;
+ }
+out:
+ return NULL;
+}
+
+int static gf_defrag_get_entry(xlator_t *this, int i,
+ struct dht_container **container, loc_t *loc,
+ dht_conf_t *conf, gf_defrag_info_t *defrag,
+ fd_t *fd, dict_t *migrate_data,
+ struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req,
+ int *perrno)
+{
+ int ret = 0;
+ char is_linkfile = 0;
+ gf_dirent_t *df_entry = NULL;
+ struct dht_container *tmp_container = NULL;
+
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = -1;
+ goto out;
+ }
- gf_msg_trace (this->name, 0, "fix layout called on %s", loc->path);
+ if (dir_dfmeta->offset_var[i].readdir_done == 1) {
+ ret = 0;
+ goto out;
+ }
- fd = fd_create (loc->inode, defrag->pid);
+ if (dir_dfmeta->fetch_entries[i] == 1) {
if (!fd) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
- ret = -1;
- goto out;
+ dir_dfmeta->fetch_entries[i] = 0;
+ dir_dfmeta->offset_var[i].readdir_done = 1;
+ ret = 0;
+ goto out;
}
- ret = syncop_opendir (this, loc, fd, NULL, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s",
- loc->path);
- ret = -1;
- goto out;
+ ret = syncop_readdirp(conf->local_subvols[i], fd, 131072,
+ dir_dfmeta->offset_var[i].offset,
+ &(dir_dfmeta->equeue[i]), xattr_req, NULL);
+ if (ret == 0) {
+ dir_dfmeta->offset_var[i].readdir_done = 1;
+ ret = 0;
+ goto out;
}
- INIT_LIST_HEAD (&entries.list);
- while ((ret = syncop_readdirp (this, fd, 131072, offset, &entries,
- NULL, NULL)) != 0)
- {
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_DATA_FAILED,
+ "Readdirp failed. Aborting data migration for "
+ "directory: %s",
+ loc->path);
+ *perrno = -ret;
+ ret = -1;
+ goto out;
+ }
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Readdir returned %s"
- ". Aborting fix-layout",strerror(-ret));
- ret = -1;
- goto out;
- }
+ if (list_empty(&(dir_dfmeta->equeue[i].list))) {
+ dir_dfmeta->offset_var[i].readdir_done = 1;
+ ret = 0;
+ goto out;
+ }
- if (list_empty (&entries.list))
- break;
+ dir_dfmeta->fetch_entries[i] = 0;
+ }
- free_entries = _gf_true;
+ while (1) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = -1;
+ goto out;
+ }
- list_for_each_entry_safe (entry, tmp, &entries.list, list) {
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
- ret = 1;
- goto out;
- }
+ df_entry = list_entry(dir_dfmeta->iterator[i]->next, typeof(*df_entry),
+ list);
- offset = entry->d_off;
+ if (&df_entry->list == dir_dfmeta->head[i]) {
+ gf_dirent_free(&(dir_dfmeta->equeue[i]));
+ INIT_LIST_HEAD(&(dir_dfmeta->equeue[i].list));
+ dir_dfmeta->fetch_entries[i] = 1;
+ dir_dfmeta->iterator[i] = dir_dfmeta->head[i];
+ ret = 0;
+ goto out;
+ }
- if (!strcmp (entry->d_name, ".") ||
- !strcmp (entry->d_name, ".."))
- continue;
+ dir_dfmeta->iterator[i] = dir_dfmeta->iterator[i]->next;
- if (!IA_ISDIR (entry->d_stat.ia_type))
- continue;
+ dir_dfmeta->offset_var[i].offset = df_entry->d_off;
+ if (!strcmp(df_entry->d_name, ".") || !strcmp(df_entry->d_name, ".."))
+ continue;
- loc_wipe (&entry_loc);
+ if (IA_ISDIR(df_entry->d_stat.ia_type)) {
+ defrag->size_processed += df_entry->d_stat.ia_size;
+ continue;
+ }
- ret =dht_build_child_loc (this, &entry_loc, loc,
- entry->d_name);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Child loc"
- " build failed");
- goto out;
- }
+ defrag->num_files_lookedup++;
- if (gf_uuid_is_null (entry->d_stat.ia_gfid)) {
- gf_log (this->name, GF_LOG_ERROR, "%s/%s"
- " gfid not present", loc->path,
- entry->d_name);
- continue;
- }
+ if (defrag->defrag_pattern &&
+ (gf_defrag_pattern_match(defrag, df_entry->d_name,
+ df_entry->d_stat.ia_size) == _gf_false)) {
+ defrag->size_processed += df_entry->d_stat.ia_size;
+ continue;
+ }
+ is_linkfile = check_is_linkfile(NULL, &df_entry->d_stat, df_entry->dict,
+ conf->link_xattr_name);
- gf_uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
+ if (is_linkfile) {
+ /* No need to add linkto file to the queue for
+ migration. Only the actual data file need to
+ be checked for migration criteria.
+ */
- /*In case the gfid stored in the inode by inode_link
- * and the gfid obtained in the lookup differs, then
- * client3_3_lookup_cbk will return ESTALE and proper
- * error will be captured
- */
+ gf_msg_debug(this->name, 0,
+ "Skipping linkfile"
+ " %s on subvol: %s",
+ df_entry->d_name, conf->local_subvols[i]->name);
+ continue;
+ }
- linked_inode = inode_link (entry_loc.inode, loc->inode,
- entry->d_name,
- &entry->d_stat);
+ /*Build Container Structure */
- inode = entry_loc.inode;
- entry_loc.inode = linked_inode;
- inode_unref (inode);
+ tmp_container = GF_CALLOC(1, sizeof(struct dht_container),
+ gf_dht_mt_container_t);
+ if (!tmp_container) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to allocate "
+ "memory for container");
+ ret = -1;
+ goto out;
+ }
+ tmp_container->df_entry = gf_dirent_for_name(df_entry->d_name);
+ if (!tmp_container->df_entry) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to allocate "
+ "memory for df_entry");
+ ret = -1;
+ goto out;
+ }
- if (gf_uuid_is_null (loc->gfid)) {
- gf_log (this->name, GF_LOG_ERROR, "%s/%s"
- " gfid not present", loc->path,
- entry->d_name);
- continue;
- }
+ tmp_container->local_subvol_index = i;
- gf_uuid_copy (entry_loc.pargfid, loc->gfid);
+ tmp_container->df_entry->d_stat = df_entry->d_stat;
- ret = syncop_lookup (this, &entry_loc, &iatt, NULL,
- NULL, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "%s"
- " lookup failed", entry_loc.path);
- ret = -1;
- continue;
- }
+ tmp_container->df_entry->d_ino = df_entry->d_ino;
- ret = syncop_setxattr (this, &entry_loc, fix_layout,
- 0, NULL, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Setxattr "
- "failed for %s", entry_loc.path);
- defrag->defrag_status =
- GF_DEFRAG_STATUS_FAILED;
- defrag->total_failures ++;
- ret = -1;
- goto out;
- }
- ret = gf_defrag_fix_layout (this, defrag, &entry_loc,
- fix_layout, migrate_data);
-
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LAYOUT_FIX_FAILED,
- "Fix layout failed for %s",
- entry_loc.path);
- defrag->total_failures++;
- goto out;
- }
+ tmp_container->df_entry->d_type = df_entry->d_type;
- }
- gf_dirent_free (&entries);
- free_entries = _gf_false;
- INIT_LIST_HEAD (&entries.list);
+ tmp_container->df_entry->d_len = df_entry->d_len;
+
+ tmp_container->parent_loc = GF_CALLOC(1, sizeof(*loc), gf_dht_mt_loc_t);
+ if (!tmp_container->parent_loc) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to allocate "
+ "memory for loc");
+ ret = -1;
+ goto out;
}
- ret = 0;
-out:
- if (free_entries)
- gf_dirent_free (&entries);
+ ret = loc_copy(tmp_container->parent_loc, loc);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "loc_copy failed");
+ ret = -1;
+ goto out;
+ }
- loc_wipe (&entry_loc);
+ tmp_container->migrate_data = migrate_data;
- if (fd)
- fd_unref (fd);
+ tmp_container->this = this;
- return ret;
+ if (df_entry->dict)
+ tmp_container->df_entry->dict = dict_ref(df_entry->dict);
+
+ /*Build Container Structure >> END*/
+
+ ret = 0;
+ goto out;
+ }
+out:
+ if (ret == 0) {
+ *container = tmp_container;
+ } else {
+ if (tmp_container) {
+ gf_defrag_free_container(tmp_container);
+ }
+ }
+
+ return ret;
}
int
-gf_defrag_start_crawl (void *data)
+gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *migrate_data, int *perrno)
{
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- int ret = -1;
- loc_t loc = {0,};
- struct iatt iatt = {0,};
- struct iatt parent = {0,};
- dict_t *fix_layout = NULL;
- dict_t *migrate_data = NULL;
- dict_t *status = NULL;
- glusterfs_ctx_t *ctx = NULL;
- dht_methods_t *methods = NULL;
-
- this = data;
- if (!this)
- goto out;
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ gf_dirent_t entries;
+ dict_t *xattr_req = NULL;
+ struct timeval dir_start = {
+ 0,
+ };
+ struct timeval end = {
+ 0,
+ };
+ double elapsed = {
+ 0,
+ };
+ int local_subvols_cnt = 0;
+ int i = 0;
+ int j = 0;
+ struct dht_container *container = NULL;
+ int ldfq_count = 0;
+ int dfc_index = 0;
+ int throttle_up = 0;
+ struct dir_dfmeta *dir_dfmeta = NULL;
+ xlator_t *old_THIS = NULL;
+
+ gf_log(this->name, GF_LOG_INFO, "migrate data called on %s", loc->path);
+ gettimeofday(&dir_start, NULL);
+
+ conf = this->private;
+ local_subvols_cnt = conf->local_subvols_cnt;
+
+ if (!local_subvols_cnt) {
+ ret = 0;
+ goto out;
+ }
- ctx = this->ctx;
- if (!ctx)
- goto out;
+ old_THIS = THIS;
+ THIS = this;
- conf = this->private;
- if (!conf)
- goto out;
+ dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer);
+ if (!dir_dfmeta) {
+ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ dir_dfmeta->lfd = GF_CALLOC(local_subvols_cnt, sizeof(fd_t *),
+ gf_common_mt_pointer);
+ if (!dir_dfmeta->lfd) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_INSUFF_MEMORY,
+ "for dir_dfmeta", NULL);
+ ret = -1;
+ *perrno = ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < local_subvols_cnt; i++) {
+ dir_dfmeta->lfd[i] = fd_create(loc->inode, defrag->pid);
+ if (!dir_dfmeta->lfd[i]) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_FD_CREATE_FAILED,
+ NULL);
+ *perrno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
- defrag = conf->defrag;
- if (!defrag)
+ ret = syncop_opendir(conf->local_subvols[i], loc, dir_dfmeta->lfd[i],
+ NULL, NULL);
+ if (ret) {
+ fd_unref(dir_dfmeta->lfd[i]);
+ dir_dfmeta->lfd[i] = NULL;
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FAILED_TO_OPEN,
+ "dir: %s", loc->path, "subvol: %s",
+ conf->local_subvols[i]->name, NULL);
+
+ if (conf->decommission_in_progress) {
+ *perrno = -ret;
+ ret = -1;
goto out;
+ }
+ } else {
+ fd_bind(dir_dfmeta->lfd[i]);
+ }
+ }
- gettimeofday (&defrag->start_time, NULL);
- dht_build_root_inode (this, &defrag->root_inode);
- if (!defrag->root_inode)
- goto out;
+ dir_dfmeta->head = GF_CALLOC(local_subvols_cnt, sizeof(*(dir_dfmeta->head)),
+ gf_common_mt_pointer);
+ if (!dir_dfmeta->head) {
+ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->head is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ dir_dfmeta->iterator = GF_CALLOC(local_subvols_cnt,
+ sizeof(*(dir_dfmeta->iterator)),
+ gf_common_mt_pointer);
+ if (!dir_dfmeta->iterator) {
+ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->iterator is NULL");
+ ret = -1;
+ goto out;
+ }
- dht_build_root_loc (defrag->root_inode, &loc);
+ dir_dfmeta->equeue = GF_CALLOC(local_subvols_cnt, sizeof(entries),
+ gf_dht_mt_dirent_t);
+ if (!dir_dfmeta->equeue) {
+ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->equeue is NULL");
+ ret = -1;
+ goto out;
+ }
- /* fix-layout on '/' first */
+ dir_dfmeta->offset_var = GF_CALLOC(
+ local_subvols_cnt, sizeof(dht_dfoffset_ctx_t), gf_dht_mt_octx_t);
+ if (!dir_dfmeta->offset_var) {
+ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->offset_var is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_defrag_ctx_subvols_init(dir_dfmeta->offset_var, this);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "dht_dfoffset_ctx_t"
+ "initialization failed");
+ ret = -1;
+ goto out;
+ }
+
+ dir_dfmeta->fetch_entries = GF_CALLOC(local_subvols_cnt, sizeof(int),
+ gf_common_mt_int);
+ if (!dir_dfmeta->fetch_entries) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_INSUFF_MEMORY,
+ "for dir_dfmeta->fetch_entries", NULL);
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < local_subvols_cnt; i++) {
+ INIT_LIST_HEAD(&(dir_dfmeta->equeue[i].list));
+ dir_dfmeta->head[i] = &(dir_dfmeta->equeue[i].list);
+ dir_dfmeta->iterator[i] = dir_dfmeta->head[i];
+ dir_dfmeta->fetch_entries[i] = 1;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ gf_log(this->name, GF_LOG_ERROR, "dict_new failed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to set dict for "
+ "key: %s",
+ conf->link_xattr_name);
+ ret = -1;
+ goto out;
+ }
- ret = syncop_lookup (this, &loc, &iatt, &parent, NULL, NULL);
+ /*
+ Job: Read entries from each local subvol and store the entries
+ in equeue array of linked list. Now pick one entry from the
+ equeue array in a round robin basis and add them to defrag Queue.
+ */
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_REBALANCE_START_FAILED,
- "Failed to start rebalance: look up on / failed");
- ret = -1;
- goto out;
+ while (!dht_dfreaddirp_done(dir_dfmeta->offset_var, local_subvols_cnt)) {
+ pthread_mutex_lock(&defrag->dfq_mutex);
+ {
+ /*Throttle up: If reconfigured count is higher than
+ current thread count, wake up the sleeping threads
+ TODO: Need to refactor this. Instead of making the
+ thread sleep and wake, we should terminate and spawn
+ threads on-demand*/
+
+ if (defrag->recon_thread_count > defrag->current_thread_count) {
+ throttle_up = (defrag->recon_thread_count -
+ defrag->current_thread_count);
+ for (j = 0; j < throttle_up; j++) {
+ pthread_cond_signal(&defrag->df_wakeup_thread);
+ }
+ }
+
+ while (defrag->q_entry_count > MAX_MIGRATE_QUEUE_COUNT) {
+ defrag->wakeup_crawler = 1;
+ pthread_cond_wait(&defrag->rebalance_crawler_alarm,
+ &defrag->dfq_mutex);
+ }
+
+ ldfq_count = defrag->q_entry_count;
+
+ if (defrag->wakeup_crawler) {
+ defrag->wakeup_crawler = 0;
+ }
}
+ pthread_mutex_unlock(&defrag->dfq_mutex);
+
+ while (
+ ldfq_count <= MAX_MIGRATE_QUEUE_COUNT &&
+ !dht_dfreaddirp_done(dir_dfmeta->offset_var, local_subvols_cnt)) {
+ ret = gf_defrag_get_entry(this, dfc_index, &container, loc, conf,
+ defrag, dir_dfmeta->lfd[dfc_index],
+ migrate_data, dir_dfmeta, xattr_req,
+ perrno);
+
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) {
+ goto out;
+ }
+
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Found "
+ "error from gf_defrag_get_entry");
- fix_layout = dict_new ();
- if (!fix_layout) {
ret = -1;
goto out;
+ }
+
+ /* Check if we got an entry, else we need to move the
+ index to the next subvol */
+ if (!container) {
+ GF_CRAWL_INDEX_MOVE(dfc_index, local_subvols_cnt);
+ continue;
+ }
+
+ /* Q this entry in the dfq */
+ pthread_mutex_lock(&defrag->dfq_mutex);
+ {
+ list_add_tail(&container->list, &(defrag->queue[0].list));
+ defrag->q_entry_count++;
+ ldfq_count = defrag->q_entry_count;
+
+ gf_msg_debug(this->name, 0,
+ "added "
+ "file:%s parent:%s to the queue ",
+ container->df_entry->d_name,
+ container->parent_loc->path);
+
+ pthread_cond_signal(&defrag->parallel_migration_cond);
+ }
+ pthread_mutex_unlock(&defrag->dfq_mutex);
+
+ GF_CRAWL_INDEX_MOVE(dfc_index, local_subvols_cnt);
}
+ }
+
+ gettimeofday(&end, NULL);
+ elapsed = gf_tvdiff(&dir_start, &end);
+ gf_log(this->name, GF_LOG_INFO,
+ "Migration operation on dir %s took "
+ "%.2f secs",
+ loc->path, elapsed / 1e6);
+ ret = 0;
+out:
+ THIS = old_THIS;
+ gf_defrag_free_dir_dfmeta(dir_dfmeta, local_subvols_cnt);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ /* It does not matter if it errored out - this number is
+ * used to calculate rebalance estimated time to complete.
+ * No locking required as dirs are processed by a single thread.
+ */
+ defrag->num_dirs_processed++;
+ return ret;
+}
- ret = dict_set_str (fix_layout, GF_XATTR_FIX_LAYOUT_KEY, "yes");
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_REBALANCE_START_FAILED,
- "Failed to start rebalance:"
- "Failed to set dictionary value: key = %s",
- GF_XATTR_FIX_LAYOUT_KEY);
+int
+gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *fix_layout)
+{
+ int ret;
+ dht_conf_t *conf = NULL;
+ /*
+ * Now we're ready to update the directory commit hash for the volume
+ * root, so that hash miscompares and broadcast lookups can stop.
+ * However, we want to skip that if fix-layout is all we did. In
+ * that case, we want the miscompares etc. to continue until a real
+ * rebalance is complete.
+ */
+ if (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX ||
+ defrag->cmd == GF_DEFRAG_CMD_DETACH_START) {
+ return 0;
+ }
- goto out;
+ conf = this->private;
+ if (!conf) {
+ /*Uh oh
+ */
+ return -1;
+ }
+
+ if (conf->local_subvols_cnt == 0 || !conf->lookup_optimize) {
+ /* Commit hash updates are only done on local subvolumes and
+ * only when lookup optimization is needed (for older client
+ * support)
+ */
+ return 0;
+ }
+
+ ret = dict_set_uint32(fix_layout, "new-commit-hash",
+ defrag->new_commit_hash);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Failed to set new-commit-hash");
+ return -1;
+ }
+
+ ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
+ "fix layout on %s failed", loc->path);
+
+ if (-ret == ENOENT || -ret == ESTALE) {
+ /* Dir most likely is deleted */
+ return 0;
}
- ret = syncop_setxattr (this, &loc, fix_layout, 0, NULL, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_REBALANCE_FAILED,
- "fix layout on %s failed",
- loc.path);
+ return -1;
+ }
+
+ /* TBD: find more efficient solution than adding/deleting every time */
+ dict_del(fix_layout, "new-commit-hash");
+
+ return 0;
+}
+
+int
+gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *fix_layout, dict_t *migrate_data)
+{
+ int ret = -1;
+ loc_t entry_loc = {
+ 0,
+ };
+ fd_t *fd = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_boolean_t free_entries = _gf_false;
+ off_t offset = 0;
+ struct iatt iatt = {
+ 0,
+ };
+ inode_t *linked_inode = NULL, *inode = NULL;
+ dht_conf_t *conf = NULL;
+ int perrno = 0;
+
+ conf = this->private;
+ if (!conf) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_lookup(this, loc, &iatt, NULL, NULL, NULL);
+ if (ret) {
+ if (strcmp(loc->path, "/") == 0) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_LOOKUP_FAILED,
+ "lookup failed for:%s", loc->path);
+
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ if (-ret == ENOENT || -ret == ESTALE) {
+ gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_LOOKUP_FAILED,
+ "Dir:%s renamed or removed. Skipping", loc->path);
+ if (conf->decommission_subvols_cnt) {
defrag->total_failures++;
- ret = -1;
- goto out;
+ }
+ ret = 0;
+ goto out;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_LOOKUP_FAILED,
+ "lookup failed for:%s", loc->path);
+
+ defrag->total_failures++;
+ goto out;
}
+ }
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) {
- migrate_data = dict_new ();
- if (!migrate_data) {
- ret = -1;
- goto out;
+ fd = fd_create(loc->inode, defrag->pid);
+ if (!fd) {
+ gf_log(this->name, GF_LOG_ERROR, "Failed to create fd");
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir(this, loc, fd, NULL, NULL);
+ if (ret) {
+ if (-ret == ENOENT || -ret == ESTALE) {
+ if (conf->decommission_subvols_cnt) {
+ defrag->total_failures++;
+ }
+ ret = 0;
+ goto out;
+ }
+
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to open dir %s, "
+ "err:%d",
+ loc->path, -ret);
+
+ ret = -1;
+ goto out;
+ }
+
+ fd_bind(fd);
+ INIT_LIST_HEAD(&entries.list);
+
+ while ((ret = syncop_readdirp(this, fd, 131072, offset, &entries, NULL,
+ NULL)) != 0) {
+ if (ret < 0) {
+ if (-ret == ENOENT || -ret == ESTALE) {
+ if (conf->decommission_subvols_cnt) {
+ defrag->total_failures++;
}
- if (defrag->cmd == GF_DEFRAG_CMD_START_FORCE)
- ret = dict_set_str (migrate_data,
- GF_XATTR_FILE_MIGRATE_KEY,
- "force");
- else
- ret = dict_set_str (migrate_data,
- GF_XATTR_FILE_MIGRATE_KEY,
- "non-force");
- if (ret)
- goto out;
+ ret = 0;
+ goto out;
+ }
+
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_READDIR_ERROR,
+ "readdirp failed for "
+ "path %s. Aborting fix-layout",
+ loc->path);
+
+ ret = -1;
+ goto out;
}
- ret = gf_defrag_fix_layout (this, defrag, &loc, fix_layout,
- migrate_data);
+ if (list_empty(&entries.list))
+ break;
+
+ free_entries = _gf_true;
+
+ list_for_each_entry_safe(entry, tmp, &entries.list, list)
+ {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = 1;
+ goto out;
+ }
+
+ offset = entry->d_off;
- if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
- methods = conf->methods;
- if (!methods) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR,
- "Methods invalid for translator.");
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+ continue;
+ if (!IA_ISDIR(entry->d_stat.ia_type)) {
+ continue;
+ }
+ loc_wipe(&entry_loc);
+
+ ret = dht_build_child_loc(this, &entry_loc, loc, entry->d_name);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Child loc"
+ " build failed for entry: %s",
+ entry->d_name);
+
+ if (conf->decommission_in_progress) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+
+ goto out;
+ } else {
+ continue;
+ }
+ }
+
+ if (gf_uuid_is_null(entry->d_stat.ia_gfid)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s/%s"
+ " gfid not present",
+ loc->path, entry->d_name);
+ continue;
+ }
+
+ gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid);
+
+ /*In case the gfid stored in the inode by inode_link
+ * and the gfid obtained in the lookup differs, then
+ * client3_3_lookup_cbk will return ESTALE and proper
+ * error will be captured
+ */
+
+ linked_inode = inode_link(entry_loc.inode, loc->inode,
+ entry->d_name, &entry->d_stat);
+
+ inode = entry_loc.inode;
+ entry_loc.inode = linked_inode;
+ inode_unref(inode);
+
+ if (gf_uuid_is_null(loc->gfid)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s/%s"
+ " gfid not present",
+ loc->path, entry->d_name);
+ continue;
+ }
+
+ gf_uuid_copy(entry_loc.pargfid, loc->gfid);
+
+ ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL);
+ if (ret) {
+ if (-ret == ENOENT || -ret == ESTALE) {
+ gf_msg(this->name, GF_LOG_INFO, -ret,
+ DHT_MSG_DIR_LOOKUP_FAILED,
+ "Dir:%s renamed or removed. "
+ "Skipping",
+ loc->path);
+ ret = 0;
+ if (conf->decommission_subvols_cnt) {
+ defrag->total_failures++;
+ }
+ continue;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_LOOKUP_FAILED, "lookup failed for:%s",
+ entry_loc.path);
+
+ defrag->total_failures++;
+
+ if (conf->decommission_in_progress) {
defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
ret = -1;
goto out;
+ } else {
+ continue;
+ }
}
- methods->migration_other(this, defrag);
- if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
- migrate_data = dict_new ();
- if (!migrate_data) {
- ret = -1;
- goto out;
- }
- ret = dict_set_str (migrate_data,
- GF_XATTR_FILE_MIGRATE_KEY,
- "force");
- if (ret)
- goto out;
-
- ret = gf_defrag_fix_layout (this, defrag, &loc,
- fix_layout,
- migrate_data);
+ }
+
+ /* A return value of 2 means, either process_dir or
+ * lookup of a dir failed. Hence, don't commit hash
+ * for the current directory*/
+
+ ret = gf_defrag_fix_layout(this, defrag, &entry_loc, fix_layout,
+ migrate_data);
+
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED ||
+ defrag->defrag_status == GF_DEFRAG_STATUS_FAILED) {
+ goto out;
+ }
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED,
+ "Fix layout failed for %s", entry_loc.path);
+
+ defrag->total_failures++;
+
+ if (conf->decommission_in_progress) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+
+ goto out;
+ } else {
+ /* Let's not commit-hash if
+ * gf_defrag_fix_layout failed*/
+ continue;
}
+ }
}
- if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) &&
- (defrag->defrag_status != GF_DEFRAG_STATUS_FAILED)) {
- defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
+ gf_dirent_free(&entries);
+ free_entries = _gf_false;
+ INIT_LIST_HEAD(&entries.list);
+ }
+
+ /* A directory layout is fixed only after its subdirs are healed to
+ * any newly added bricks. If the layout is fixed before subdirs are
+ * healed, the newly added brick will get a non-null layout.
+ * Any subdirs which hash to that layout will no longer show up
+ * in a directory listing until they are healed.
+ */
+
+ ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL);
+
+ /* In case of a race where the directory is deleted just before
+ * layout setxattr, the errors are updated in the layout structure.
+ * We can use this information to make a decision whether the directory
+ * is deleted entirely.
+ */
+ if (ret == 0) {
+ ret = dht_dir_layout_error_check(this, loc->inode);
+ ret = -ret;
+ }
+
+ if (ret) {
+ if (-ret == ENOENT || -ret == ESTALE) {
+ gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
+ "Setxattr failed. Dir %s "
+ "renamed or removed",
+ loc->path);
+ if (conf->decommission_subvols_cnt) {
+ defrag->total_failures++;
+ }
+ ret = 0;
+ goto out;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
+ "Setxattr failed for %s", loc->path);
+
+ defrag->total_failures++;
+
+ if (conf->decommission_in_progress) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ ret = -1;
+ goto out;
+ }
}
+ }
-out:
- LOCK (&defrag->lock);
- {
- status = dict_new ();
- gf_defrag_status_get (defrag, status);
- if (ctx && ctx->notify)
- ctx->notify (GF_EN_DEFRAG_STATUS, status);
- if (status)
- dict_unref (status);
- defrag->is_exiting = 1;
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno);
+
+ if (ret) {
+ if (perrno == ENOENT || perrno == ESTALE) {
+ ret = 0;
+ goto out;
+ } else {
+ defrag->total_failures++;
+
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DEFRAG_PROCESS_DIR_FAILED,
+ "gf_defrag_process_dir failed for "
+ "directory: %s",
+ loc->path);
+
+ if (conf->decommission_in_progress) {
+ goto out;
+ }
+ }
}
- UNLOCK (&defrag->lock);
+ }
+
+ gf_msg_trace(this->name, 0, "fix layout called on %s", loc->path);
+
+ if (gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) {
+ defrag->total_failures++;
+
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SETTLE_HASH_FAILED,
+ "Settle hash failed for %s", loc->path);
+
+ ret = -1;
- if (defrag) {
- GF_FREE (defrag);
- conf->defrag = NULL;
+ if (conf->decommission_in_progress) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ goto out;
}
+ }
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free(&entries);
+
+ loc_wipe(&entry_loc);
+
+ if (fd)
+ fd_unref(fd);
+
+ return ret;
+}
+
+int
+dht_init_local_subvols_and_nodeuuids(xlator_t *this, dht_conf_t *conf,
+ loc_t *loc)
+{
+ dict_t *dict = NULL;
+ uuid_t *uuid_ptr = NULL;
+ int ret = -1;
+ int i = 0;
+ int j = 0;
+
+ /* Find local subvolumes */
+ ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL, NULL,
+ NULL);
+ if (ret && (ret != -ENODATA)) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, 0,
+ "local "
+ "subvolume determination failed with error: %d",
+ -ret);
+ ret = -1;
+ goto out;
+ }
+
+ if (!ret)
+ goto out;
+
+ ret = syncop_getxattr(this, loc, &dict, GF_REBAL_OLD_FIND_LOCAL_SUBVOL,
+ NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, 0,
+ "local "
+ "subvolume determination failed with error: %d",
+ -ret);
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+
+out:
+ if (ret) {
return ret;
+ }
+
+ for (i = 0; i < conf->local_subvols_cnt; i++) {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "local subvol: "
+ "%s",
+ conf->local_subvols[i]->name);
+
+ for (j = 0; j < conf->local_nodeuuids[i].count; j++) {
+ uuid_ptr = &(conf->local_nodeuuids[i].elements[j].uuid);
+ gf_msg(this->name, GF_LOG_INFO, 0, 0, "node uuid : %s",
+ uuid_utoa(*uuid_ptr));
+ }
+ }
+
+ return ret;
}
+/* Functions for the rebalance estimates feature */
-static int
-gf_defrag_done (int ret, call_frame_t *sync_frame, void *data)
+uint64_t
+gf_defrag_subvol_file_size(xlator_t *this, loc_t *root_loc)
{
- gf_listener_stop (sync_frame->this);
+ int ret = -1;
+ struct statvfs buf = {
+ 0,
+ };
+
+ ret = syncop_statfs(this, root_loc, &buf, NULL, NULL);
+ if (ret) {
+ /* Aargh! */
+ return 0;
+ }
+ return ((buf.f_blocks - buf.f_bfree) * buf.f_frsize);
+}
- STACK_DESTROY (sync_frame->root);
- kill (getpid(), SIGTERM);
+uint64_t
+gf_defrag_total_file_size(xlator_t *this, loc_t *root_loc)
+{
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ uint64_t size_files = 0;
+ uint64_t total_size = 0;
+
+ conf = this->private;
+ if (!conf) {
return 0;
+ }
+
+ for (i = 0; i < conf->local_subvols_cnt; i++) {
+ size_files = gf_defrag_subvol_file_size(conf->local_subvols[i],
+ root_loc);
+ total_size += size_files;
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "local subvol: %s,"
+ "cnt = %" PRIu64,
+ conf->local_subvols[i]->name, size_files);
+ }
+
+ gf_msg(this->name, GF_LOG_INFO, 0, 0, "Total size files = %" PRIu64,
+ total_size);
+
+ return total_size;
}
-void *
-gf_defrag_start (void *data)
+static void *
+dht_file_counter_thread(void *args)
{
- int ret = -1;
- call_frame_t *frame = NULL;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- xlator_t *this = NULL;
-
- this = data;
- conf = this->private;
- if (!conf)
- goto out;
+ gf_defrag_info_t *defrag = NULL;
+ loc_t root_loc = {
+ 0,
+ };
+ struct timespec time_to_wait = {
+ 0,
+ };
+ uint64_t tmp_size = 0;
+
+ if (!args)
+ return NULL;
- defrag = conf->defrag;
- if (!defrag)
- goto out;
+ defrag = (gf_defrag_info_t *)args;
+ dht_build_root_loc(defrag->root_inode, &root_loc);
- frame = create_frame (this, this->ctx->pool);
- if (!frame)
- goto out;
+ while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
+ timespec_now(&time_to_wait);
+ time_to_wait.tv_sec += 600;
- /* If its a tiering rebalancer mark it seperately so that CTR Xlator
- * can take appropriate action */
- if (defrag->cmd == GF_DEFRAG_CMD_START_TIER)
- frame->root->pid = GF_CLIENT_PID_TIER_DEFRAG;
- else
- frame->root->pid = GF_CLIENT_PID_DEFRAG;
+ pthread_mutex_lock(&defrag->fc_mutex);
+ pthread_cond_timedwait(&defrag->fc_wakeup_cond, &defrag->fc_mutex,
+ &time_to_wait);
+
+ pthread_mutex_unlock(&defrag->fc_mutex);
- defrag->pid = frame->root->pid;
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED)
+ break;
- defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
+ tmp_size = gf_defrag_total_file_size(defrag->this, &root_loc);
- ret = synctask_new (this->ctx->env, gf_defrag_start_crawl,
- gf_defrag_done, frame, this);
+ gf_log("dht", GF_LOG_INFO, "tmp data size =%" PRIu64, tmp_size);
- if (ret)
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_REBALANCE_START_FAILED,
- "Could not create task for rebalance");
+ if (!tmp_size) {
+ gf_msg("dht", GF_LOG_ERROR, 0, 0,
+ "Failed to get "
+ "the total data size. Unable to estimate "
+ "time to complete rebalance.");
+ } else {
+ g_totalsize = tmp_size;
+ gf_msg_debug("dht", 0, "total data size =%" PRIu64, g_totalsize);
+ }
+ }
+
+ return NULL;
+}
+
+int
+gf_defrag_estimates_cleanup(xlator_t *this, gf_defrag_info_t *defrag,
+ pthread_t filecnt_thread)
+{
+ int ret = -1;
+
+ /* Wake up the filecounter thread.
+ * By now the defrag status will no longer be
+ * GF_DEFRAG_STATUS_STARTED so the thread will exit the loop.
+ */
+ pthread_mutex_lock(&defrag->fc_mutex);
+ {
+ pthread_cond_broadcast(&defrag->fc_wakeup_cond);
+ }
+ pthread_mutex_unlock(&defrag->fc_mutex);
+
+ ret = pthread_join(filecnt_thread, NULL);
+ if (ret) {
+ gf_msg("dht", GF_LOG_ERROR, ret, 0,
+ "file_counter_thread: pthread_join failed.");
+ ret = -1;
+ }
+ return ret;
+}
+
+int
+gf_defrag_estimates_init(xlator_t *this, loc_t *loc, pthread_t *filecnt_thread)
+{
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+
+ conf = this->private;
+ defrag = conf->defrag;
+
+ g_totalsize = gf_defrag_total_file_size(this, loc);
+ if (!g_totalsize) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "Failed to get "
+ "the total data size. Unable to estimate "
+ "time to complete rebalance.");
+ goto out;
+ }
+
+ ret = gf_thread_create(filecnt_thread, NULL, dht_file_counter_thread,
+ (void *)defrag, "dhtfcnt");
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ret, 0,
+ "Failed to "
+ "create the file counter thread ");
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
out:
- return NULL;
+ return ret;
}
+/* Init and cleanup functions for parallel file migration*/
int
-gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)
+gf_defrag_parallel_migration_init(xlator_t *this, gf_defrag_info_t *defrag,
+ pthread_t **tid_array, int *thread_index)
{
- int ret = 0;
- uint64_t files = 0;
- uint64_t size = 0;
- uint64_t lookup = 0;
- uint64_t failures = 0;
- uint64_t skipped = 0;
- char *status = "";
- double elapsed = 0;
- struct timeval end = {0,};
-
-
- if (!defrag)
- goto out;
+ int ret = -1;
+ int thread_spawn_count = 0;
+ int index = 0;
+ pthread_t *tid = NULL;
- ret = 0;
- if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED)
- goto out;
+ if (!defrag)
+ goto out;
+
+ /* Initialize global entry queue */
+ defrag->queue = GF_CALLOC(1, sizeof(struct dht_container),
+ gf_dht_mt_container_t);
+
+ if (!defrag->queue) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "Failed to initialise migration queue");
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&(defrag->queue[0].list));
+
+ thread_spawn_count = MAX(MAX_REBAL_THREADS, 4);
+
+ gf_msg_debug(this->name, 0, "thread_spawn_count: %d", thread_spawn_count);
+
+ tid = GF_CALLOC(thread_spawn_count, sizeof(pthread_t),
+ gf_common_mt_pthread_t);
+ if (!tid) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "Failed to create migration threads");
+ ret = -1;
+ goto out;
+ }
+ defrag->current_thread_count = thread_spawn_count;
+
+ /*Spawn Threads Here*/
+ while (index < thread_spawn_count) {
+ ret = gf_thread_create(&(tid[index]), NULL, gf_defrag_task,
+ (void *)defrag, "dhtmig%d", (index + 1) & 0x3ff);
+ if (ret != 0) {
+ gf_msg("DHT", GF_LOG_ERROR, ret, 0, "Thread[%d] creation failed. ",
+ index);
+ ret = -1;
+ goto out;
+ } else {
+ gf_log("DHT", GF_LOG_INFO,
+ "Thread[%d] "
+ "creation successful",
+ index);
+ }
+ index++;
+ }
+
+ ret = 0;
+out:
+ *thread_index = index;
+ *tid_array = tid;
+
+ return ret;
+}
+
+int
+gf_defrag_parallel_migration_cleanup(gf_defrag_info_t *defrag,
+ pthread_t *tid_array, int thread_index)
+{
+ int ret = -1;
+ int i = 0;
+
+ if (!defrag)
+ goto out;
+
+ /* Wake up all migration threads */
+ pthread_mutex_lock(&defrag->dfq_mutex);
+ {
+ defrag->crawl_done = 1;
+
+ pthread_cond_broadcast(&defrag->parallel_migration_cond);
+ pthread_cond_broadcast(&defrag->df_wakeup_thread);
+ }
+ pthread_mutex_unlock(&defrag->dfq_mutex);
+
+ /*Wait for all the threads to complete their task*/
+ for (i = 0; i < thread_index; i++) {
+ pthread_join(tid_array[i], NULL);
+ }
+
+ GF_FREE(tid_array);
+
+ /* Cleanup the migration queue */
+ if (defrag->queue) {
+ gf_dirent_free(defrag->queue[0].df_entry);
+ INIT_LIST_HEAD(&(defrag->queue[0].list));
+ }
+
+ GF_FREE(defrag->queue);
- files = defrag->total_files;
- size = defrag->total_data;
- lookup = defrag->num_files_lookedup;
- failures = defrag->total_failures;
- skipped = defrag->skipped;
+ ret = 0;
+out:
+ return ret;
+}
- gettimeofday (&end, NULL);
+int
+gf_defrag_start_crawl(void *data)
+{
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ dict_t *fix_layout = NULL;
+ dict_t *migrate_data = NULL;
+ dict_t *status = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ call_frame_t *statfs_frame = NULL;
+ xlator_t *old_THIS = NULL;
+ int ret = -1;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ struct iatt parent = {
+ 0,
+ };
+ int thread_index = 0;
+ pthread_t *tid = NULL;
+ pthread_t filecnt_thread;
+ gf_boolean_t fc_thread_started = _gf_false;
+
+ this = data;
+ if (!this)
+ goto exit;
+
+ ctx = this->ctx;
+ if (!ctx)
+ goto exit;
+
+ conf = this->private;
+ if (!conf)
+ goto exit;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto exit;
+
+ defrag->start_time = gf_time();
+
+ dht_build_root_inode(this, &defrag->root_inode);
+ if (!defrag->root_inode)
+ goto out;
+
+ dht_build_root_loc(defrag->root_inode, &loc);
+
+ /* fix-layout on '/' first */
+
+ ret = syncop_lookup(this, &loc, &iatt, &parent, NULL, NULL);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_REBALANCE_START_FAILED,
+ "Failed to start rebalance: look up on / failed");
+ ret = -1;
+ goto out;
+ }
+
+ old_THIS = THIS;
+ THIS = this;
- elapsed = end.tv_sec - defrag->start_time.tv_sec;
+ statfs_frame = create_frame(this, this->ctx->pool);
+ if (!statfs_frame) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
+ "Insufficient memory. Frame creation failed");
+ ret = -1;
+ goto out;
+ }
- if (!dict)
- goto log;
+ /* async statfs update for honoring min-free-disk */
+ dht_get_du_info(statfs_frame, this, &loc);
+ THIS = old_THIS;
- ret = dict_set_uint64 (dict, "files", files);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set file count");
+ fix_layout = dict_new();
+ if (!fix_layout) {
+ ret = -1;
+ goto out;
+ }
+
+ /*
+ * Unfortunately, we can't do special xattrs (like fix.layout) and
+ * real ones in the same call currently, and changing it seems
+ * riskier than just doing two calls.
+ */
+
+ gf_log(this->name, GF_LOG_INFO, "%s using commit hash %u", __func__,
+ conf->vol_commit_hash);
+
+ ret = dict_set_uint32(fix_layout, conf->commithash_xattr_name,
+ conf->vol_commit_hash);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Failed to set %s",
+ conf->commithash_xattr_name);
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_setxattr(this, &loc, fix_layout, 0, NULL, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to set commit hash on %s. "
+ "Rebalance cannot proceed.",
+ loc.path);
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ /* We now return to our regularly scheduled program. */
+
+ ret = dict_set_str(fix_layout, GF_XATTR_FIX_LAYOUT_KEY, "yes");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_START_FAILED,
+ "Failed to start rebalance:"
+ "Failed to set dictionary value: key = %s",
+ GF_XATTR_FIX_LAYOUT_KEY);
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
- ret = dict_set_uint64 (dict, "size", size);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set size of xfer");
+ defrag->new_commit_hash = conf->vol_commit_hash;
- ret = dict_set_uint64 (dict, "lookups", lookup);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set lookedup file count");
+ ret = syncop_setxattr(this, &loc, fix_layout, 0, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_REBALANCE_FAILED,
+ "fix layout on %s failed", loc.path);
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ /* We need to migrate files */
- ret = dict_set_int32 (dict, "status", defrag->defrag_status);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set status");
- if (elapsed) {
- ret = dict_set_double (dict, "run-time", elapsed);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set run-time");
+ migrate_data = dict_new();
+ if (!migrate_data) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str(
+ migrate_data, GF_XATTR_FILE_MIGRATE_KEY,
+ (defrag->cmd == GF_DEFRAG_CMD_START_FORCE) ? "force" : "non-force");
+ if (ret) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
}
- ret = dict_set_uint64 (dict, "failures", failures);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set failure count");
+ ret = dht_init_local_subvols_and_nodeuuids(this, conf, &loc);
+ if (ret) {
+ ret = -1;
+ goto out;
+ }
- ret = dict_set_uint64 (dict, "skipped", skipped);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set skipped file count");
-log:
- switch (defrag->defrag_status) {
- case GF_DEFRAG_STATUS_NOT_STARTED:
- status = "not started";
- break;
- case GF_DEFRAG_STATUS_STARTED:
- status = "in progress";
- break;
- case GF_DEFRAG_STATUS_STOPPED:
- status = "stopped";
- break;
- case GF_DEFRAG_STATUS_COMPLETE:
- status = "completed";
- break;
- case GF_DEFRAG_STATUS_FAILED:
- status = "failed";
- break;
- default:
- break;
+ /* Initialise the structures required for parallel migration */
+ ret = gf_defrag_parallel_migration_init(this, defrag, &tid,
+ &thread_index);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "Aborting rebalance.");
+ goto out;
+ }
+
+ ret = gf_defrag_estimates_init(this, &loc, &filecnt_thread);
+ if (ret) {
+ /* Not a fatal error. Allow the rebalance to proceed*/
+ ret = 0;
+ } else {
+ fc_thread_started = _gf_true;
}
+ }
- gf_msg (THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
- "Rebalance is %s. Time taken is %.2f secs",
- status, elapsed);
- gf_msg (THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
- "Files migrated: %"PRIu64", size: %"
- PRIu64", lookups: %"PRIu64", failures: %"PRIu64", skipped: "
- "%"PRIu64, files, size, lookup, failures, skipped);
+ ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout, migrate_data);
+ if (ret) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+ if (gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+ gf_log("DHT", GF_LOG_INFO, "crawling file-system completed");
out:
- return 0;
+
+ /* We are here means crawling the entire file system is done
+ or something failed. Set defrag->crawl_done flag to intimate
+ the migrator threads to exhaust the defrag->queue and terminate*/
+
+ if (ret) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ }
+
+ gf_defrag_parallel_migration_cleanup(defrag, tid, thread_index);
+
+ if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) &&
+ (defrag->defrag_status != GF_DEFRAG_STATUS_FAILED)) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
+ }
+
+ if (fc_thread_started) {
+ gf_defrag_estimates_cleanup(this, defrag, filecnt_thread);
+ }
+
+ dht_send_rebalance_event(this, defrag->cmd, defrag->defrag_status);
+
+ status = dict_new();
+ LOCK(&defrag->lock);
+ {
+ gf_defrag_status_get(conf, status);
+ if (ctx && ctx->notify)
+ ctx->notify(GF_EN_DEFRAG_STATUS, status);
+ if (status)
+ dict_unref(status);
+ defrag->is_exiting = 1;
+ }
+ UNLOCK(&defrag->lock);
+
+ GF_FREE(defrag);
+ conf->defrag = NULL;
+
+ if (migrate_data)
+ dict_unref(migrate_data);
+
+ if (statfs_frame) {
+ STACK_DESTROY(statfs_frame->root);
+ }
+exit:
+ return ret;
+}
+
+static int
+gf_defrag_done(int ret, call_frame_t *sync_frame, void *data)
+{
+ gf_listener_stop(sync_frame->this);
+
+ STACK_DESTROY(sync_frame->root);
+ kill(getpid(), SIGTERM);
+ return 0;
+}
+
+void *
+gf_defrag_start(void *data)
+{
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ xlator_t *this = NULL;
+ xlator_t *old_THIS = NULL;
+
+ this = data;
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto out;
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
+
+ defrag->pid = frame->root->pid;
+
+ defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
+
+ old_THIS = THIS;
+ THIS = this;
+ ret = synctask_new(this->ctx->env, gf_defrag_start_crawl, gf_defrag_done,
+ frame, this);
+
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_START_FAILED,
+ "Could not create task for rebalance");
+ THIS = old_THIS;
+out:
+ return NULL;
+}
+
+uint64_t
+gf_defrag_get_estimates_based_on_size(dht_conf_t *conf)
+{
+ gf_defrag_info_t *defrag = NULL;
+ double rate_processed = 0;
+ uint64_t total_processed = 0;
+ uint64_t tmp_count = 0;
+ uint64_t time_to_complete = 0;
+ double elapsed = 0;
+
+ defrag = conf->defrag;
+
+ if (!g_totalsize)
+ goto out;
+
+ elapsed = gf_time() - defrag->start_time;
+
+ /* Don't calculate the estimates for the first 10 minutes.
+ * It is unlikely to be accurate and estimates are not required
+ * if the process finishes in less than 10 mins.
+ */
+
+ if (elapsed < ESTIMATE_START_INTERVAL) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, 0,
+ "Rebalance estimates will not be available for the "
+ "first %d seconds.",
+ ESTIMATE_START_INTERVAL);
+
+ goto out;
+ }
+
+ total_processed = defrag->size_processed;
+
+ /* rate at which files processed */
+ rate_processed = (total_processed) / elapsed;
+
+ tmp_count = g_totalsize;
+
+ if (rate_processed) {
+ time_to_complete = (tmp_count) / rate_processed;
+
+ } else {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, 0,
+ "Unable to calculate estimated time for rebalance");
+ }
+
+ gf_log(THIS->name, GF_LOG_INFO,
+ "TIME: (size) total_processed=%" PRIu64 " tmp_cnt = %" PRIu64
+ ","
+ "rate_processed=%f, elapsed = %f",
+ total_processed, tmp_count, rate_processed, elapsed);
+
+out:
+ return time_to_complete;
}
int
-gf_defrag_start_detach_tier (gf_defrag_info_t *defrag)
+gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
{
- defrag->cmd = GF_DEFRAG_CMD_START_DETACH_TIER;
+ int ret = 0;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ char *status = "";
+ double elapsed = 0;
+ uint64_t time_to_complete = 0;
+ uint64_t time_left = 0;
+ gf_defrag_info_t *defrag = conf->defrag;
+
+ if (!defrag)
+ goto out;
+
+ ret = 0;
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED)
+ goto out;
+
+ files = defrag->total_files;
+ size = defrag->total_data;
+ lookup = defrag->num_files_lookedup;
+ failures = defrag->total_failures;
+ skipped = defrag->skipped;
+
+ elapsed = gf_time() - defrag->start_time;
+
+ /* The rebalance is still in progress */
+
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
+ time_to_complete = gf_defrag_get_estimates_based_on_size(conf);
+
+ if (time_to_complete && (time_to_complete > elapsed))
+ time_left = time_to_complete - elapsed;
+
+ gf_log(THIS->name, GF_LOG_INFO,
+ "TIME: Estimated total time to complete (size)= %" PRIu64
+ " seconds, seconds left = %" PRIu64 "",
+ time_to_complete, time_left);
+ }
+
+ if (!dict)
+ goto log;
+
+ ret = dict_set_uint64(dict, "files", files);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set file count");
+
+ ret = dict_set_uint64(dict, "size", size);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set size of xfer");
+
+ ret = dict_set_uint64(dict, "lookups", lookup);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set lookedup file count");
+
+ ret = dict_set_int32(dict, "status", defrag->defrag_status);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set status");
+
+ ret = dict_set_double(dict, "run-time", elapsed);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set run-time");
+
+ ret = dict_set_uint64(dict, "failures", failures);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set failure count");
+
+ ret = dict_set_uint64(dict, "skipped", skipped);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set skipped file count");
+
+ ret = dict_set_uint64(dict, "time-left", time_left);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set time-left");
- return 0;
+log:
+ switch (defrag->defrag_status) {
+ case GF_DEFRAG_STATUS_NOT_STARTED:
+ status = "not started";
+ break;
+ case GF_DEFRAG_STATUS_STARTED:
+ status = "in progress";
+ break;
+ case GF_DEFRAG_STATUS_STOPPED:
+ status = "stopped";
+ break;
+ case GF_DEFRAG_STATUS_COMPLETE:
+ status = "completed";
+ break;
+ case GF_DEFRAG_STATUS_FAILED:
+ status = "failed";
+ break;
+ default:
+ break;
+ }
+
+ gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
+ "Rebalance is %s. Time taken is %.2f secs", status, elapsed);
+ gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
+ "Files migrated: %" PRIu64 ", size: %" PRIu64 ", lookups: %" PRIu64
+ ", failures: %" PRIu64
+ ", skipped: "
+ "%" PRIu64,
+ files, size, lookup, failures, skipped);
+out:
+ return 0;
}
int
-gf_defrag_stop (gf_defrag_info_t *defrag, gf_defrag_status_t status,
- dict_t *output)
+gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output)
{
- /* TODO: set a variable 'stop_defrag' here, it should be checked
- in defrag loop */
- int ret = -1;
- GF_ASSERT (defrag);
+ /* TODO: set a variable 'stop_defrag' here, it should be checked
+ in defrag loop */
+ int ret = -1;
+ gf_defrag_info_t *defrag = conf->defrag;
- if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) {
- goto out;
- }
+ GF_ASSERT(defrag);
- gf_msg ("", GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STOPPED,
- "Received stop command on rebalance");
- defrag->defrag_status = status;
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) {
+ goto out;
+ }
- if (output)
- gf_defrag_status_get (defrag, output);
- ret = 0;
+ gf_msg("", GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STOPPED,
+ "Received stop command on rebalance");
+ defrag->defrag_status = status;
+
+ if (output)
+ gf_defrag_status_get(conf, output);
+ ret = 0;
out:
- gf_msg_debug ("", 0, "Returning %d", ret);
- return ret;
+ gf_msg_debug("", 0, "Returning %d", ret);
+ return ret;
}
diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c
index 0594945203e..d9dbf50492f 100644
--- a/xlators/cluster/dht/src/dht-rename.c
+++ b/xlators/cluster/dht/src/dht-rename.c
@@ -11,1349 +11,1987 @@
/* TODO: link(oldpath, newpath) fails if newpath already exists. DHT should
* delete the newpath if it gets EEXISTS from link() call.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
-#include "defaults.h"
+#include "dht-lock.h"
+#include <glusterfs/defaults.h>
+int
+dht_rename_unlock(call_frame_t *frame, xlator_t *this);
+int32_t
+dht_rename_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
int
-dht_rename_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
+dht_rename_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- char gfid[GF_UUID_BUF_SIZE] = {0};
+ dht_local_t *local = NULL;
- local = frame->local;
- prev = cookie;
+ local = frame->local;
+ dht_set_fixed_dir_stat(&local->preoldparent);
+ dht_set_fixed_dir_stat(&local->postoldparent);
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
- if (op_ret == -1) {
- /* TODO: undo the damage */
- gf_uuid_unparse(local->loc.inode->gfid, gfid);
+ if (IA_ISREG(local->stbuf.ia_type))
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
- gf_msg (this->name, GF_LOG_INFO, op_errno,
- DHT_MSG_RENAME_FAILED,
- "Rename %s -> %s on %s failed, (gfid = %s)",
- local->loc.path, local->loc2.path,
- prev->this->name, gfid);
+ DHT_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno,
+ &local->stbuf, &local->preoldparent, &local->postoldparent,
+ &local->preparent, &local->postparent, local->xattr);
+ return 0;
+}
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- goto unwind;
- }
- /* TODO: construct proper stbuf for dir */
- /*
- * FIXME: is this the correct way to build stbuf and
- * parent bufs?
- */
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preoldparent, preoldparent,
- prev->this);
- dht_iatt_merge (this, &local->postoldparent, postoldparent,
- prev->this);
- dht_iatt_merge (this, &local->preparent, prenewparent,
- prev->this);
- dht_iatt_merge (this, &local->postparent, postnewparent,
- prev->this);
+static void
+dht_rename_dir_unlock_src(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ local = frame->local;
+ dht_unlock_namespace(frame, &local->lock[0]);
+ return;
+}
-unwind:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- WIPE (&local->preoldparent);
- WIPE (&local->postoldparent);
- WIPE (&local->preparent);
- WIPE (&local->postparent);
-
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent,
- &local->preparent, &local->postparent, xdata);
- }
+static void
+dht_rename_dir_unlock_dst(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ int op_ret = -1;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+
+ /* Unlock entrylk */
+ dht_unlock_entrylk_wrapper(frame, &local->lock[1].ns.directory_ns);
+
+ /* Unlock inodelk */
+ op_ret = dht_unlock_inodelk(frame, local->lock[1].ns.parent_layout.locks,
+ local->lock[1].ns.parent_layout.lk_count,
+ dht_rename_unlock_cbk);
+ if (op_ret < 0) {
+ uuid_utoa_r(local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r(local->loc2.inode->gfid, dst_gfid);
+
+ if (IA_ISREG(local->stbuf.ia_type))
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
+ "winding unlock inodelk failed "
+ "rename (%s:%s:%s %s:%s:%s), "
+ "stale locks left on bricks",
+ local->loc.path, src_gfid, local->src_cached->name,
+ local->loc2.path, dst_gfid,
+ local->dst_cached ? local->dst_cached->name : NULL);
+ else
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
+ "winding unlock inodelk failed "
+ "rename (%s:%s %s:%s), "
+ "stale locks left on bricks",
+ local->loc.path, src_gfid, local->loc2.path, dst_gfid);
- return 0;
-}
+ dht_rename_unlock_cbk(frame, NULL, this, 0, 0, NULL);
+ }
+ return;
+}
+static int
+dht_rename_dir_unlock(call_frame_t *frame, xlator_t *this)
+{
+ dht_rename_dir_unlock_src(frame, this);
+ dht_rename_dir_unlock_dst(frame, this);
+ return 0;
+}
int
-dht_rename_hashed_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent, dict_t *xdata)
+dht_rename_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int call_cnt = 0;
- call_frame_t *prev = NULL;
- int i = 0;
- char gfid[GF_UUID_BUF_SIZE] = {0};
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ int i = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ int subvol_cnt = -1;
+
+ conf = this->private;
+ local = frame->local;
+ prev = cookie;
+ subvol_cnt = dht_subvol_cnt(this, prev);
+ local->ret_cache[subvol_cnt] = op_ret;
+
+ if (op_ret == -1) {
+ gf_uuid_unparse(local->loc.inode->gfid, gfid);
- conf = this->private;
- local = frame->local;
- prev = cookie;
+ gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_RENAME_FAILED,
+ "Rename %s -> %s on %s failed, (gfid = %s)", local->loc.path,
+ local->loc2.path, prev->name, gfid);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+ /* TODO: construct proper stbuf for dir */
+ /*
+ * FIXME: is this the correct way to build stbuf and
+ * parent bufs?
+ */
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->preoldparent, preoldparent);
+ dht_iatt_merge(this, &local->postoldparent, postoldparent);
+ dht_iatt_merge(this, &local->preparent, prenewparent);
+ dht_iatt_merge(this, &local->postparent, postnewparent);
+unwind:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ /* We get here with local->call_cnt == 0. Which means
+ * we are the only one executing this code, there is
+ * no contention. Therefore it's safe to manipulate or
+ * deref local->call_cnt directly (without locking).
+ */
+ if (local->ret_cache[conf->subvolume_cnt] == 0) {
+ /* count errant subvols in last field of ret_cache */
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (local->ret_cache[i] != 0)
+ ++local->ret_cache[conf->subvolume_cnt];
+ }
+ if (local->ret_cache[conf->subvolume_cnt]) {
+ /* undoing the damage:
+ * for all subvolumes, where rename
+ * succeeded, we perform the reverse operation
+ */
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (local->ret_cache[i] == 0)
+ ++local->call_cnt;
+ }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (local->ret_cache[i])
+ continue;
- if (op_ret == -1) {
- /* TODO: undo the damage */
+ STACK_WIND(frame, dht_rename_dir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->rename, &local->loc2,
+ &local->loc, NULL);
+ }
- gf_uuid_unparse(local->loc.inode->gfid, gfid);
+ return 0;
+ }
+ }
- gf_msg (this->name, GF_LOG_INFO, op_errno,
- DHT_MSG_RENAME_FAILED,
- "rename %s -> %s on %s failed, (gfid = %s) ",
- local->loc.path, local->loc2.path,
- prev->this->name, gfid );
+ WIPE(&local->preoldparent);
+ WIPE(&local->postoldparent);
+ WIPE(&local->preparent);
+ WIPE(&local->postparent);
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- goto unwind;
- }
- /* TODO: construct proper stbuf for dir */
- /*
- * FIXME: is this the correct way to build stbuf and
- * parent bufs?
- */
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preoldparent, preoldparent,
- prev->this);
- dht_iatt_merge (this, &local->postoldparent, postoldparent,
- prev->this);
- dht_iatt_merge (this, &local->preparent, prenewparent,
- prev->this);
- dht_iatt_merge (this, &local->postparent, postnewparent,
- prev->this);
-
- call_cnt = local->call_cnt = conf->subvolume_cnt - 1;
-
- if (!local->call_cnt)
- goto unwind;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == local->dst_hashed)
- continue;
- STACK_WIND (frame, dht_rename_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->rename,
- &local->loc, &local->loc2, NULL);
- if (!--call_cnt)
- break;
- }
+ dht_rename_dir_unlock(frame, this);
+ }
+ return 0;
+}
- return 0;
-unwind:
- WIPE (&local->preoldparent);
- WIPE (&local->postoldparent);
- WIPE (&local->preparent);
- WIPE (&local->postparent);
+int
+dht_rename_hashed_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int call_cnt = 0;
+ xlator_t *prev = NULL;
+ int i = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ conf = this->private;
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ gf_uuid_unparse(local->loc.inode->gfid, gfid);
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent,
- &local->preparent, &local->postparent, NULL);
+ gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_RENAME_FAILED,
+ "rename %s -> %s on %s failed, (gfid = %s) ", local->loc.path,
+ local->loc2.path, prev->name, gfid);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+ /* TODO: construct proper stbuf for dir */
+ /*
+ * FIXME: is this the correct way to build stbuf and
+ * parent bufs?
+ */
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->preoldparent, preoldparent);
+ dht_iatt_merge(this, &local->postoldparent, postoldparent);
+ dht_iatt_merge(this, &local->preparent, prenewparent);
+ dht_iatt_merge(this, &local->postparent, postnewparent);
+
+ call_cnt = local->call_cnt = conf->subvolume_cnt - 1;
+
+ if (!local->call_cnt)
+ goto unwind;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == local->dst_hashed)
+ continue;
+ STACK_WIND_COOKIE(
+ frame, dht_rename_dir_cbk, conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->rename, &local->loc, &local->loc2, NULL);
+ if (!--call_cnt)
+ break;
+ }
+
+ return 0;
+unwind:
+ WIPE(&local->preoldparent);
+ WIPE(&local->postoldparent);
+ WIPE(&local->preparent);
+ WIPE(&local->postparent);
- return 0;
+ dht_rename_dir_unlock(frame, this);
+ return 0;
}
-
int
-dht_rename_dir_do (call_frame_t *frame, xlator_t *this)
+dht_rename_dir_do(call_frame_t *frame, xlator_t *this)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (local->op_ret == -1)
- goto err;
+ if (local->op_ret == -1)
+ goto err;
- local->op_ret = 0;
+ local->op_ret = 0;
- STACK_WIND (frame, dht_rename_hashed_dir_cbk,
- local->dst_hashed,
- local->dst_hashed->fops->rename,
- &local->loc, &local->loc2, NULL);
- return 0;
+ STACK_WIND_COOKIE(frame, dht_rename_hashed_dir_cbk, local->dst_hashed,
+ local->dst_hashed, local->dst_hashed->fops->rename,
+ &local->loc, &local->loc2, NULL);
+ return 0;
err:
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, NULL, NULL,
- NULL, NULL, NULL, NULL);
- return 0;
+ dht_rename_dir_unlock(frame, this);
+ return 0;
}
-
int
-dht_rename_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries,
- dict_t *xdata)
+dht_rename_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
-
- local = frame->local;
- prev = cookie;
-
- if (op_ret > 2) {
- gf_msg_trace (this->name, 0,
- "readdir on %s for %s returned %d entries",
- prev->this->name, local->loc.path, op_ret);
- local->op_ret = -1;
- local->op_errno = ENOTEMPTY;
- }
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ xlator_t *prev = NULL;
- this_call_cnt = dht_frame_return (frame);
+ local = frame->local;
+ prev = cookie;
- if (is_last_call (this_call_cnt)) {
- dht_rename_dir_do (frame, this);
- }
+ if (op_ret > 2) {
+ gf_msg_trace(this->name, 0, "readdir on %s for %s returned %d entries",
+ prev->name, local->loc.path, op_ret);
+ local->op_ret = -1;
+ local->op_errno = ENOTEMPTY;
+ }
- return 0;
-}
+ this_call_cnt = dht_frame_return(frame);
+
+ if (is_last_call(this_call_cnt)) {
+ dht_rename_dir_do(frame, this);
+ }
+ return 0;
+}
int
-dht_rename_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+dht_rename_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
- char gfid[GF_UUID_BUF_SIZE] = {0};
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ xlator_t *prev = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
- local = frame->local;
- prev = cookie;
+ local = frame->local;
+ prev = cookie;
+ if (op_ret == -1) {
+ gf_uuid_unparse(local->loc.inode->gfid, gfid);
+ gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_OPENDIR_FAILED,
+ "opendir on %s for %s failed,(gfid = %s) ", prev->name,
+ local->loc.path, gfid);
+ goto err;
+ }
- if (op_ret == -1) {
+ fd_bind(fd);
+ STACK_WIND_COOKIE(frame, dht_rename_readdir_cbk, prev, prev,
+ prev->fops->readdir, local->fd, 4096, 0, NULL);
- gf_uuid_unparse(local->loc.inode->gfid, gfid);
- gf_msg (this->name, GF_LOG_INFO, op_errno,
- DHT_MSG_OPENDIR_FAILED,
- "opendir on %s for %s failed,(gfid = %s) ",
- prev->this->name, local->loc.path, gfid);
- goto err;
- }
+ return 0;
- STACK_WIND (frame, dht_rename_readdir_cbk,
- prev->this, prev->this->fops->readdir,
- local->fd, 4096, 0, NULL);
+err:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ dht_rename_dir_do(frame, this);
+ }
+
+ return 0;
+}
+
+int
+dht_rename_dir_lock2_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+ dht_conf_t *conf = NULL;
+ int i = 0;
+
+ local = frame->local;
+ conf = this->private;
+
+ if (op_ret < 0) {
+ uuid_utoa_r(local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r(local->loc2.inode->gfid, dst_gfid);
+
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR,
+ "acquiring entrylk after inodelk failed"
+ "rename (%s:%s:%s %s:%s:%s)",
+ local->loc.path, src_gfid, local->src_cached->name,
+ local->loc2.path, dst_gfid,
+ local->dst_cached ? local->dst_cached->name : NULL);
+
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ local->fd = fd_create(local->loc.inode, frame->root->pid);
+ if (!local->fd) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->op_ret = 0;
+
+ if (!local->dst_cached) {
+ dht_rename_dir_do(frame, this);
return 0;
+ }
-err:
- this_call_cnt = dht_frame_return (frame);
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_rename_opendir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->opendir, &local->loc2,
+ local->fd, NULL);
+ }
- if (is_last_call (this_call_cnt)) {
- dht_rename_dir_do (frame, this);
- }
+ return 0;
- return 0;
+err:
+ /* No harm in calling an extra unlock */
+ dht_rename_dir_unlock(frame, this);
+ return 0;
}
-
int
-dht_rename_dir (call_frame_t *frame, xlator_t *this)
+dht_rename_dir_lock1_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int i = 0;
- int op_errno = -1;
+ dht_local_t *local = NULL;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+ loc_t *loc = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ uuid_utoa_r(local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r(local->loc2.inode->gfid, dst_gfid);
+
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR,
+ "acquiring entrylk after inodelk failed"
+ "rename (%s:%s:%s %s:%s:%s)",
+ local->loc.path, src_gfid, local->src_cached->name,
+ local->loc2.path, dst_gfid,
+ local->dst_cached ? local->dst_cached->name : NULL);
+
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ if (local->current == &local->lock[0]) {
+ loc = &local->loc2;
+ subvol = local->dst_hashed;
+ local->current = &local->lock[1];
+ } else {
+ loc = &local->loc;
+ subvol = local->src_hashed;
+ local->current = &local->lock[0];
+ }
+ ret = dht_protect_namespace(frame, loc, subvol, &local->current->ns,
+ dht_rename_dir_lock2_cbk);
+ if (ret < 0) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ return 0;
+err:
+ /* No harm in calling an extra unlock */
+ dht_rename_dir_unlock(frame, this);
+ return 0;
+}
+/*
+ * If the hashed subvolumes of both source and dst are the different,
+ * lock in dictionary order of hashed subvol->name. This is important
+ * in case the parent directory is the same for both src and dst to
+ * prevent inodelk deadlocks when racing with a fix-layout op on the parent.
+ *
+ * If the hashed subvols are the same, use the gfid/name to determine
+ * the order of taking locks to prevent entrylk deadlocks when the parent
+ * dirs are the same.
+ *
+ */
+static int
+dht_order_rename_lock(call_frame_t *frame, loc_t **loc, xlator_t **subvol)
+{
+ int ret = 0;
+ int op_ret = 0;
+ dht_local_t *local = NULL;
+ char *src = NULL;
+ char *dst = NULL;
+
+ local = frame->local;
+
+ if (local->src_hashed->name == local->dst_hashed->name) {
+ ret = 0;
+ } else {
+ ret = strcmp(local->src_hashed->name, local->dst_hashed->name);
+ }
+
+ if (ret == 0) {
+ /* hashed subvols are the same for src and dst */
+ /* Entrylks need to be ordered*/
+
+ src = alloca(GF_UUID_BNAME_BUF_SIZE + strlen(local->loc.name) + 1);
+ if (!src) {
+ gf_msg(frame->this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "Insufficient memory for src");
+ op_ret = -1;
+ goto out;
+ }
- conf = frame->this->private;
- local = frame->local;
+ if (!gf_uuid_is_null(local->loc.pargfid))
+ uuid_utoa_r(local->loc.pargfid, src);
+ else if (local->loc.parent)
+ uuid_utoa_r(local->loc.parent->gfid, src);
+ else
+ src[0] = '\0';
- local->call_cnt = conf->subvolume_cnt;
+ strcat(src, local->loc.name);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!conf->subvolume_status[i]) {
- gf_msg (this->name, GF_LOG_INFO, 0,
- DHT_MSG_RENAME_FAILED,
- "Rename dir failed: subvolume down (%s)",
- conf->subvolumes[i]->name);
- op_errno = ENOTCONN;
- goto err;
- }
+ dst = alloca(GF_UUID_BNAME_BUF_SIZE + strlen(local->loc2.name) + 1);
+ if (!dst) {
+ gf_msg(frame->this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "Insufficient memory for dst");
+ op_ret = -1;
+ goto out;
}
- local->fd = fd_create (local->loc.inode, frame->root->pid);
- if (!local->fd) {
- op_errno = ENOMEM;
- goto err;
- }
+ if (!gf_uuid_is_null(local->loc2.pargfid))
+ uuid_utoa_r(local->loc2.pargfid, dst);
+ else if (local->loc2.parent)
+ uuid_utoa_r(local->loc2.parent->gfid, dst);
+ else
+ dst[0] = '\0';
- local->op_ret = 0;
+ strcat(dst, local->loc2.name);
+ ret = strcmp(src, dst);
+ }
- if (!local->dst_cached) {
- dht_rename_dir_do (frame, this);
- return 0;
- }
+ if (ret <= 0) {
+ /*inodelk in dictionary order of hashed subvol names*/
+ /*entrylk in dictionary order of gfid/basename */
+ local->current = &local->lock[0];
+ *loc = &local->loc;
+ *subvol = local->src_hashed;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_rename_opendir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->opendir,
- &local->loc2, local->fd, NULL);
- }
+ } else {
+ local->current = &local->lock[1];
+ *loc = &local->loc2;
+ *subvol = local->dst_hashed;
+ }
- return 0;
+ op_ret = 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
+out:
+ return op_ret;
}
-#define DHT_MARK_FOP_INTERNAL(xattr) do { \
- int tmp = -1; \
- if (!xattr) { \
- xattr = dict_new (); \
- if (!xattr) \
- break; \
- } \
- tmp = dict_set_str (xattr, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); \
- if (tmp) { \
- gf_msg (this->name, GF_LOG_ERROR, 0, \
- DHT_MSG_DICT_SET_FAILED, \
- "Failed to set dictionary value: key = %s," \
- " path = %s", GLUSTERFS_INTERNAL_FOP_KEY, \
- local->loc.path); \
- } \
- }while (0)
-
-#define DHT_MARKER_DONT_ACCOUNT(xattr) do { \
- int tmp = -1; \
- if (!xattr) { \
- xattr = dict_new (); \
- if (!xattr) \
- break; \
- } \
- tmp = dict_set_str (xattr, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY, \
- "yes"); \
- if (tmp) { \
- gf_msg (this->name, GF_LOG_ERROR, 0, \
- DHT_MSG_DICT_SET_FAILED, \
- "Failed to set dictionary value: key = %s," \
- " path = %s",GLUSTERFS_MARKER_DONT_ACCOUNT_KEY, \
- local->loc.path); \
- } \
- }while (0)
int
-dht_rename_unlock_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
+dht_rename_dir(call_frame_t *frame, xlator_t *this)
{
- dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+ xlator_t *subvol = NULL;
+ int i = 0;
+ int ret = 0;
+ int op_errno = -1;
+
+ conf = frame->this->private;
+ local = frame->local;
+
+ local->ret_cache = GF_CALLOC(conf->subvolume_cnt + 1, sizeof(int),
+ gf_dht_ret_cache_t);
+
+ if (local->ret_cache == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_FAILED,
+ "Rename dir failed: subvolume down (%s)",
+ conf->subvolumes[i]->name);
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ }
+
+ /* Locks on src and dst needs to ordered which otherwise might cause
+ * deadlocks when rename (src, dst) and rename (dst, src) is done from
+ * two different clients
+ */
+ ret = dht_order_rename_lock(frame, &loc, &subvol);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ /* Rename must take locks on src to avoid lookup selfheal from
+ * recreating src on those subvols where the rename was successful.
+ * The locks can't be issued parallel as two different clients might
+ * attempt same rename command and be in dead lock.
+ */
+ ret = dht_protect_namespace(frame, loc, subvol, &local->current->ns,
+ dht_rename_dir_lock1_cbk);
+ if (ret < 0) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ return 0;
- local = frame->local;
+err:
+ DHT_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+}
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent, &local->preparent,
- &local->postparent, NULL);
- return 0;
+static int
+dht_rename_track_for_changelog(xlator_t *this, dict_t *xattr, loc_t *oldloc,
+ loc_t *newloc)
+{
+ int ret = -1;
+ dht_changelog_rename_info_t *info = NULL;
+ char *name = NULL;
+ int len1 = 0;
+ int len2 = 0;
+ int size = 0;
+
+ if (!xattr || !oldloc || !newloc || !this)
+ return ret;
+
+ len1 = strlen(oldloc->name) + 1;
+ len2 = strlen(newloc->name) + 1;
+ size = sizeof(dht_changelog_rename_info_t) + len1 + len2;
+
+ info = GF_CALLOC(size, sizeof(char), gf_common_mt_char);
+ if (!info) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to calloc memory");
+ return ret;
+ }
+
+ gf_uuid_copy(info->old_pargfid, oldloc->pargfid);
+ gf_uuid_copy(info->new_pargfid, newloc->pargfid);
+
+ info->oldname_len = len1;
+ info->newname_len = len2;
+ strncpy(info->buffer, oldloc->name, len1);
+ name = info->buffer + len1;
+ strncpy(name, newloc->name, len2);
+
+ ret = dict_set_bin(xattr, DHT_CHANGELOG_RENAME_OP_KEY, info, size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value: key = %s,"
+ " path = %s",
+ DHT_CHANGELOG_RENAME_OP_KEY, oldloc->name);
+ GF_FREE(info);
+ }
+
+ return ret;
}
+#define DHT_MARKER_DONT_ACCOUNT(xattr) \
+ do { \
+ int tmp = -1; \
+ if (!xattr) { \
+ xattr = dict_new(); \
+ if (!xattr) \
+ break; \
+ } \
+ tmp = dict_set_str(xattr, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY, "yes"); \
+ if (tmp) { \
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, \
+ "Failed to set dictionary value: key = %s," \
+ " path = %s", \
+ GLUSTERFS_MARKER_DONT_ACCOUNT_KEY, local->loc.path); \
+ } \
+ } while (0)
+
+#define DHT_CHANGELOG_TRACK_AS_RENAME(xattr, oldloc, newloc) \
+ do { \
+ int tmp = -1; \
+ if (!xattr) { \
+ xattr = dict_new(); \
+ if (!xattr) { \
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, \
+ "Failed to create dictionary to " \
+ "track rename"); \
+ break; \
+ } \
+ } \
+ \
+ tmp = dht_rename_track_for_changelog(this, xattr, oldloc, newloc); \
+ \
+ if (tmp) { \
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, \
+ "Failed to set dictionary value: key = %s," \
+ " path = %s", \
+ DHT_CHANGELOG_RENAME_OP_KEY, (oldloc)->path); \
+ } \
+ } while (0)
+
int
-dht_rename_unlock (call_frame_t *frame, xlator_t *this)
+dht_rename_unlock(call_frame_t *frame, xlator_t *this)
{
- dht_local_t *local = NULL;
- int op_ret = -1;
- char src_gfid[GF_UUID_BUF_SIZE] = {0};
- char dst_gfid[GF_UUID_BUF_SIZE] = {0};
-
- local = frame->local;
-
- op_ret = dht_unlock_inodelk (frame, local->lock.locks,
- local->lock.lk_count,
- dht_rename_unlock_cbk);
- if (op_ret < 0) {
- uuid_utoa_r (local->loc.inode->gfid, src_gfid);
-
- if (local->loc2.inode)
- uuid_utoa_r (local->loc2.inode->gfid, dst_gfid);
-
- gf_log (this->name, GF_LOG_WARNING,
- "winding unlock inodelk failed "
- "rename (%s:%s:%s %s:%s:%s), "
- "stale locks left on bricks",
- local->loc.path, src_gfid, local->src_cached->name,
- local->loc2.path, dst_gfid,
- local->dst_cached ? local->dst_cached->name : NULL);
-
- dht_rename_unlock_cbk (frame, NULL, this, 0, 0, NULL);
- }
-
- return 0;
+ dht_local_t *local = NULL;
+ int op_ret = -1;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+ dht_ilock_wrap_t inodelk_wrapper = {
+ 0,
+ };
+
+ local = frame->local;
+ inodelk_wrapper.locks = local->rename_inodelk_backward_compatible;
+ inodelk_wrapper.lk_count = local->rename_inodelk_bc_count;
+
+ op_ret = dht_unlock_inodelk_wrapper(frame, &inodelk_wrapper);
+ if (op_ret < 0) {
+ uuid_utoa_r(local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r(local->loc2.inode->gfid, dst_gfid);
+
+ if (IA_ISREG(local->stbuf.ia_type))
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
+ "winding unlock inodelk failed "
+ "rename (%s:%s:%s %s:%s:%s), "
+ "stale locks left on bricks",
+ local->loc.path, src_gfid, local->src_cached->name,
+ local->loc2.path, dst_gfid,
+ local->dst_cached ? local->dst_cached->name : NULL);
+ else
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
+ "winding unlock inodelk failed "
+ "rename (%s:%s %s:%s), "
+ "stale locks left on bricks",
+ local->loc.path, src_gfid, local->loc2.path, dst_gfid);
+ }
+
+ dht_unlock_namespace(frame, &local->lock[0]);
+ dht_unlock_namespace(frame, &local->lock[1]);
+
+ dht_rename_unlock_cbk(frame, NULL, this, local->op_ret, local->op_errno,
+ NULL);
+ return 0;
}
int
-dht_rename_done (call_frame_t *frame, xlator_t *this)
+dht_rename_done(call_frame_t *frame, xlator_t *this)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (local->linked == _gf_true) {
- local->linked = _gf_false;
- dht_linkfile_attr_heal (frame, this);
- }
+ if (local->linked == _gf_true) {
+ local->linked = _gf_false;
+ dht_linkfile_attr_heal(frame, this);
+ }
- dht_rename_unlock (frame, this);
- return 0;
+ dht_rename_unlock(frame, this);
+ return 0;
}
int
-dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+dht_rename_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int this_call_cnt = 0;
- local = frame->local;
- prev = cookie;
+ local = frame->local;
+ prev = cookie;
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "!local, should not happen");
- goto out;
- }
+ FRAME_SU_UNDO(frame, dht_local_t);
+ if (!local) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_VALUE,
+ "!local, should not happen");
+ goto out;
+ }
- this_call_cnt = dht_frame_return (frame);
+ this_call_cnt = dht_frame_return(frame);
- if (op_ret == -1) {
- gf_msg (this->name, GF_LOG_WARNING, op_errno,
- DHT_MSG_UNLINK_FAILED,
- "%s: Rename: unlink on %s failed ",
- local->loc.path, prev->this->name);
- }
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLINK_FAILED,
+ "%s: Rename: unlink on %s failed ", local->loc.path, prev->name);
+ }
- WIPE (&local->preoldparent);
- WIPE (&local->postoldparent);
- WIPE (&local->preparent);
- WIPE (&local->postparent);
+ WIPE(&local->preoldparent);
+ WIPE(&local->postoldparent);
+ WIPE(&local->preparent);
+ WIPE(&local->postparent);
- if (is_last_call (this_call_cnt)) {
- dht_rename_done (frame, this);
- }
+ if (is_last_call(this_call_cnt)) {
+ dht_rename_done(frame, this);
+ }
out:
- return 0;
+ return 0;
}
-
int
-dht_rename_cleanup (call_frame_t *frame)
+dht_rename_cleanup(call_frame_t *frame)
{
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *dst_cached = NULL;
- int call_cnt = 0;
- dict_t *xattr = NULL;
- char gfid[GF_UUID_BUF_SIZE] = {0};
-
- local = frame->local;
- this = frame->this;
-
- src_hashed = local->src_hashed;
- src_cached = local->src_cached;
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
-
- if (src_cached == dst_cached)
- goto nolinks;
-
- if (local->linked && (dst_hashed != src_hashed )&&
- (dst_hashed != src_cached)) {
- call_cnt++;
- }
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ int call_cnt = 0;
+ dict_t *xattr = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
- if (local->added_link && (src_cached != dst_hashed)) {
- call_cnt++;
- }
+ local = frame->local;
+ this = frame->this;
- local->call_cnt = call_cnt;
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
- if (!call_cnt)
- goto nolinks;
+ if (src_cached == dst_cached)
+ goto nolinks;
- DHT_MARK_FOP_INTERNAL (xattr);
+ if (local->linked && (dst_hashed != src_hashed) &&
+ (dst_hashed != src_cached)) {
+ call_cnt++;
+ }
- gf_uuid_unparse(local->loc.inode->gfid, gfid);
+ if (local->added_link && (src_cached != dst_hashed)) {
+ call_cnt++;
+ }
- if (local->linked && (dst_hashed != src_hashed) &&
- (dst_hashed != src_cached)) {
- dict_t *xattr_new = NULL;
+ local->call_cnt = call_cnt;
- gf_msg_trace (this->name, 0,
- "unlinking linkfile %s @ %s => %s, (gfid = %s)",
- local->loc.path, dst_hashed->name,
- src_cached->name, gfid);
+ if (!call_cnt)
+ goto nolinks;
- xattr_new = dict_copy_with_ref (xattr, NULL);
+ DHT_MARK_FOP_INTERNAL(xattr);
+ gf_uuid_unparse(local->loc.inode->gfid, gfid);
- DHT_MARKER_DONT_ACCOUNT(xattr_new);
+ if (local->linked && (dst_hashed != src_hashed) &&
+ (dst_hashed != src_cached)) {
+ dict_t *xattr_new = NULL;
- STACK_WIND (frame, dht_rename_unlink_cbk,
- dst_hashed, dst_hashed->fops->unlink,
- &local->loc, 0, xattr_new);
+ gf_msg_trace(this->name, 0,
+ "unlinking linkfile %s @ %s => %s, (gfid = %s)",
+ local->loc.path, dst_hashed->name, src_cached->name, gfid);
- dict_unref (xattr_new);
- xattr_new = NULL;
- }
+ xattr_new = dict_copy_with_ref(xattr, NULL);
- if (local->added_link && (src_cached != dst_hashed)) {
- dict_t *xattr_new = NULL;
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
- gf_msg_trace (this->name, 0,
- "unlinking link %s => %s (%s), (gfid = %s)",
- local->loc.path, local->loc2.path,
- src_cached->name, gfid);
+ FRAME_SU_DO(frame, dht_local_t);
+ STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, dst_hashed, dst_hashed,
+ dst_hashed->fops->unlink, &local->loc, 0, xattr_new);
- xattr_new = dict_copy_with_ref (xattr, NULL);
+ dict_unref(xattr_new);
+ xattr_new = NULL;
+ }
- if (gf_uuid_compare (local->loc.pargfid,
- local->loc2.pargfid) == 0) {
- DHT_MARKER_DONT_ACCOUNT(xattr_new);
- }
+ if (local->added_link && (src_cached != dst_hashed)) {
+ dict_t *xattr_new = NULL;
+
+ gf_msg_trace(this->name, 0, "unlinking link %s => %s (%s), (gfid = %s)",
+ local->loc.path, local->loc2.path, src_cached->name, gfid);
- STACK_WIND (frame, dht_rename_unlink_cbk,
- src_cached, src_cached->fops->unlink,
- &local->loc2, 0, xattr_new);
+ xattr_new = dict_copy_with_ref(xattr, NULL);
- dict_unref (xattr_new);
- xattr_new = NULL;
+ if (gf_uuid_compare(local->loc.pargfid, local->loc2.pargfid) == 0) {
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
}
+ /* *
+ * The link to file is created using root permission.
+ * Hence deletion should happen using root. Otherwise
+ * it will fail.
+ */
+ FRAME_SU_DO(frame, dht_local_t);
+ STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, src_cached, src_cached,
+ src_cached->fops->unlink, &local->loc2, 0, xattr_new);
- if (xattr)
- dict_unref (xattr);
+ dict_unref(xattr_new);
+ xattr_new = NULL;
+ }
- return 0;
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
nolinks:
- WIPE (&local->preoldparent);
- WIPE (&local->postoldparent);
- WIPE (&local->preparent);
- WIPE (&local->postparent);
+ WIPE(&local->preoldparent);
+ WIPE(&local->postoldparent);
+ WIPE(&local->preparent);
+ WIPE(&local->postparent);
- dht_rename_unlock (frame, this);
- return 0;
+ dht_rename_unlock(frame, this);
+ return 0;
}
-
int
-dht_rename_links_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+dht_rename_unlink(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *prev = NULL;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *rename_subvol = NULL;
+ dict_t *xattr = NULL;
- prev = cookie;
- local = frame->local;
+ local = frame->local;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "link/file %s on %s failed (%s)",
- local->loc.path, prev->this->name, strerror (op_errno));
- }
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
- if (local->linked == _gf_true) {
- local->linked = _gf_false;
- dht_linkfile_attr_heal (frame, this);
- }
- DHT_STACK_DESTROY (frame);
+ local->call_cnt = 0;
- return 0;
-}
+ /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk
+ * is called. since rename has already happened on rename_subvol,
+ * unlink shouldn't be sent for oldpath (either linkfile or cached-file)
+ * on rename_subvol. */
+ if (src_cached == dst_cached)
+ rename_subvol = src_cached;
+ else
+ rename_subvol = dst_hashed;
+ /* TODO: delete files in background */
-int
-dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *dst_cached = NULL;
- xlator_t *rename_subvol = NULL;
- call_frame_t *link_frame = NULL;
- dht_local_t *link_local = NULL;
- dict_t *xattr = NULL;
-
- local = frame->local;
- prev = cookie;
-
- src_hashed = local->src_hashed;
- src_cached = local->src_cached;
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
-
- if (local->linked == _gf_true)
- FRAME_SU_UNDO (frame, dht_local_t);
-
- /* It is a critical failure iff we fail to rename the cached file
- * if the rename of the linkto failed, it is not a critical failure,
- * and we do not want to lose the created hard link for the new
- * name as that could have been read by other clients.
- *
- * NOTE: If another client is attempting the same oldname -> newname
- * rename, and finds both file names as existing, and are hard links
- * to each other, then FUSE would send in an unlink for oldname. In
- * this time duration if we treat the linkto as a critical error and
- * unlink the newname we created, we would have effectively lost the
- * file to rename operations.
- *
- * Repercussions of treating this as a non-critical error is that
- * we could leave behind a stale linkto file and/or not create the new
- * linkto file, the second case would be rectified by a subsequent
- * lookup, the first case by a rebalance, like for all stale linkto
- * files */
-
- if (op_ret == -1) {
- /* Critical failure: unable to rename the cached file */
- if (prev->this == src_cached) {
- gf_msg (this->name, GF_LOG_WARNING, op_errno,
- DHT_MSG_RENAME_FAILED,
- "%s: Rename on %s failed, (gfid = %s) ",
- local->loc.path, prev->this->name,
- local->loc.inode ?
- uuid_utoa(local->loc.inode->gfid):"");
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- goto cleanup;
- } else {
- /* Non-critical failure, unable to rename the linkto
- * file
- */
- gf_msg (this->name, GF_LOG_INFO, op_errno,
- DHT_MSG_RENAME_FAILED,
- "%s: Rename (linkto file) on %s failed, "
- "(gfid = %s) ",
- local->loc.path, prev->this->name,
- local->loc.inode ?
- uuid_utoa(local->loc.inode->gfid):"");
- }
- }
+ if (src_cached != dst_hashed && src_cached != dst_cached)
+ local->call_cnt++;
- if ((src_cached == dst_cached) && (dst_hashed != dst_cached)) {
- link_frame = copy_frame (frame);
- if (!link_frame) {
- goto err;
- }
+ if (src_hashed != rename_subvol && src_hashed != src_cached)
+ local->call_cnt++;
- /* fop value sent as maxvalue because it is not used
- anywhere in this case */
- link_local = dht_local_init (link_frame, &local->loc2, NULL,
- GF_FOP_MAXVALUE);
- if (!link_local) {
- goto err;
- }
+ if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached)
+ local->call_cnt++;
- if (link_local->loc.inode)
- inode_unref (link_local->loc.inode);
- link_local->loc.inode = inode_ref (local->loc.inode);
- gf_uuid_copy (link_local->gfid, local->loc.inode->gfid);
+ if (local->call_cnt == 0)
+ goto unwind;
- dht_linkfile_create (link_frame, dht_rename_links_create_cbk,
- this, src_cached, dst_hashed,
- &link_local->loc);
- }
+ DHT_MARK_FOP_INTERNAL(xattr);
-err:
- /* Merge attrs only from src_cached. In case there of src_cached !=
- * dst_hashed, this ignores linkfile attrs. */
- if (prev->this == src_cached) {
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preoldparent, preoldparent,
- prev->this);
- dht_iatt_merge (this, &local->postoldparent, postoldparent,
- prev->this);
- dht_iatt_merge (this, &local->preparent, prenewparent,
- prev->this);
- dht_iatt_merge (this, &local->postparent, postnewparent,
- prev->this);
- }
+ if (src_cached != dst_hashed && src_cached != dst_cached) {
+ dict_t *xattr_new = NULL;
+ xattr_new = dict_copy_with_ref(xattr, NULL);
- /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk
- * is called. since rename has already happened on rename_subvol,
- * unlink should not be sent for oldpath (either linkfile or cached-file)
- * on rename_subvol. */
- if (src_cached == dst_cached)
- rename_subvol = src_cached;
- else
- rename_subvol = dst_hashed;
+ gf_msg_trace(this->name, 0, "deleting old src datafile %s @ %s",
+ local->loc.path, src_cached->name);
- /* TODO: delete files in background */
+ if (gf_uuid_compare(local->loc.pargfid, local->loc2.pargfid) == 0) {
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
+ }
- if (src_cached != dst_hashed && src_cached != dst_cached)
- local->call_cnt++;
+ DHT_CHANGELOG_TRACK_AS_RENAME(xattr_new, &local->loc, &local->loc2);
+ STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, src_cached, src_cached,
+ src_cached->fops->unlink, &local->loc, 0, xattr_new);
- if (src_hashed != rename_subvol && src_hashed != src_cached)
- local->call_cnt++;
+ dict_unref(xattr_new);
+ xattr_new = NULL;
+ }
- if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached)
- local->call_cnt++;
+ if (src_hashed != rename_subvol && src_hashed != src_cached) {
+ dict_t *xattr_new = NULL;
- if (local->call_cnt == 0)
- goto unwind;
+ xattr_new = dict_copy_with_ref(xattr, NULL);
- DHT_MARK_FOP_INTERNAL (xattr);
+ gf_msg_trace(this->name, 0, "deleting old src linkfile %s @ %s",
+ local->loc.path, src_hashed->name);
- if (src_cached != dst_hashed && src_cached != dst_cached) {
- dict_t *xattr_new = NULL;
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
- xattr_new = dict_copy_with_ref (xattr, NULL);
+ STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, src_hashed, src_hashed,
+ src_hashed->fops->unlink, &local->loc, 0, xattr_new);
- gf_msg_trace (this->name, 0,
- "deleting old src datafile %s @ %s",
- local->loc.path, src_cached->name);
+ dict_unref(xattr_new);
+ xattr_new = NULL;
+ }
- if (gf_uuid_compare (local->loc.pargfid,
- local->loc2.pargfid) == 0) {
- DHT_MARKER_DONT_ACCOUNT(xattr_new);
- }
+ if (dst_cached && (dst_cached != dst_hashed) &&
+ (dst_cached != src_cached)) {
+ gf_msg_trace(this->name, 0, "deleting old dst datafile %s @ %s",
+ local->loc2.path, dst_cached->name);
- STACK_WIND (frame, dht_rename_unlink_cbk,
- src_cached, src_cached->fops->unlink,
- &local->loc, 0, xattr_new);
+ STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, dst_cached, dst_cached,
+ dst_cached->fops->unlink, &local->loc2, 0, xattr);
+ }
+ if (xattr)
+ dict_unref(xattr);
+ return 0;
- dict_unref (xattr_new);
- xattr_new = NULL;
- }
-
- if (src_hashed != rename_subvol && src_hashed != src_cached) {
- dict_t *xattr_new = NULL;
+unwind:
+ WIPE(&local->preoldparent);
+ WIPE(&local->postoldparent);
+ WIPE(&local->preparent);
+ WIPE(&local->postparent);
- xattr_new = dict_copy_with_ref (xattr, NULL);
+ dht_rename_done(frame, this);
- gf_msg_trace (this->name, 0,
- "deleting old src linkfile %s @ %s",
- local->loc.path, src_hashed->name);
+ return 0;
+}
- DHT_MARKER_DONT_ACCOUNT(xattr_new);
+int
+dht_rename_links_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ xlator_t *prev = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ prev = cookie;
+ local = frame->local;
+ main_frame = local->main_frame;
+
+ /* TODO: Handle this case in lookup-optimize */
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_CREATE_LINK_FAILED,
+ "link/file %s on %s failed", local->loc.path, prev->name);
+ }
+
+ if (local->linked == _gf_true) {
+ local->linked = _gf_false;
+ dht_linkfile_attr_heal(frame, this);
+ }
+
+ dht_rename_unlink(main_frame, this);
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
- STACK_WIND (frame, dht_rename_unlink_cbk,
- src_hashed, src_hashed->fops->unlink,
- &local->loc, 0, xattr_new);
+int
+dht_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ call_frame_t *link_frame = NULL;
+ dht_local_t *link_local = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
+
+ if (local->linked == _gf_true)
+ FRAME_SU_UNDO(frame, dht_local_t);
+
+ /* It is a critical failure iff we fail to rename the cached file
+ * if the rename of the linkto failed, it is not a critical failure,
+ * and we do not want to lose the created hard link for the new
+ * name as that could have been read by other clients.
+ *
+ * NOTE: If another client is attempting the same oldname -> newname
+ * rename, and finds both file names as existing, and are hard links
+ * to each other, then FUSE would send in an unlink for oldname. In
+ * this time duration if we treat the linkto as a critical error and
+ * unlink the newname we created, we would have effectively lost the
+ * file to rename operations.
+ *
+ * Repercussions of treating this as a non-critical error is that
+ * we could leave behind a stale linkto file and/or not create the new
+ * linkto file, the second case would be rectified by a subsequent
+ * lookup, the first case by a rebalance, like for all stale linkto
+ * files */
+
+ if (op_ret == -1) {
+ /* Critical failure: unable to rename the cached file */
+ if (prev == src_cached) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_RENAME_FAILED,
+ "%s: Rename on %s failed, (gfid = %s) ", local->loc.path,
+ prev->name,
+ local->loc.inode ? uuid_utoa(local->loc.inode->gfid) : "");
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto cleanup;
+ } else {
+ /* Non-critical failure, unable to rename the linkto
+ * file
+ */
+ gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_RENAME_FAILED,
+ "%s: Rename (linkto file) on %s failed, "
+ "(gfid = %s) ",
+ local->loc.path, prev->name,
+ local->loc.inode ? uuid_utoa(local->loc.inode->gfid) : "");
+ }
+ }
+ if (xdata) {
+ if (!local->xattr)
+ local->xattr = dict_ref(xdata);
+ else
+ local->xattr = dict_copy_with_ref(xdata, local->xattr);
+ }
+
+ /* Merge attrs only from src_cached. In case there of src_cached !=
+ * dst_hashed, this ignores linkfile attrs. */
+ if (prev == src_cached) {
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->preoldparent, preoldparent);
+ dht_iatt_merge(this, &local->postoldparent, postoldparent);
+ dht_iatt_merge(this, &local->preparent, prenewparent);
+ dht_iatt_merge(this, &local->postparent, postnewparent);
+ }
+
+ /* Create the linkto file for the dst file */
+ if ((src_cached == dst_cached) && (dst_hashed != dst_cached)) {
+ link_frame = copy_frame(frame);
+ if (!link_frame) {
+ goto unlink;
+ }
- dict_unref (xattr_new);
- xattr_new = NULL;
+ /* fop value sent as maxvalue because it is not used
+ * anywhere in this case */
+ link_local = dht_local_init(link_frame, &local->loc2, NULL,
+ GF_FOP_MAXVALUE);
+ if (!link_local) {
+ goto unlink;
}
- if (dst_cached
- && (dst_cached != dst_hashed)
- && (dst_cached != src_cached)) {
- gf_msg_trace (this->name, 0,
- "deleting old dst datafile %s @ %s",
- local->loc2.path, dst_cached->name);
+ if (link_local->loc.inode)
+ inode_unref(link_local->loc.inode);
+ link_local->loc.inode = inode_ref(local->loc.inode);
+ link_local->main_frame = frame;
+ link_local->stbuf = local->stbuf;
+ gf_uuid_copy(link_local->gfid, local->loc.inode->gfid);
- STACK_WIND (frame, dht_rename_unlink_cbk,
- dst_cached, dst_cached->fops->unlink,
- &local->loc2, 0, xattr);
- }
- if (xattr)
- dict_unref (xattr);
+ dht_linkfile_create(link_frame, dht_rename_links_create_cbk, this,
+ src_cached, dst_hashed, &link_local->loc);
return 0;
+ }
-unwind:
- WIPE (&local->preoldparent);
- WIPE (&local->postoldparent);
- WIPE (&local->preparent);
- WIPE (&local->postparent);
-
- dht_rename_done (frame, this);
+unlink:
- return 0;
+ if (link_frame) {
+ DHT_STACK_DESTROY(link_frame);
+ }
+ dht_rename_unlink(frame, this);
+ return 0;
cleanup:
- dht_rename_cleanup (frame);
+ dht_rename_cleanup(frame);
- return 0;
+ return 0;
}
-
int
-dht_do_rename (call_frame_t *frame)
+dht_do_rename(call_frame_t *frame)
{
- dht_local_t *local = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_cached = NULL;
- xlator_t *this = NULL;
- xlator_t *rename_subvol = NULL;
- dict_t *dict = NULL;
-
- local = frame->local;
- this = frame->this;
-
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
- src_cached = local->src_cached;
-
- if (src_cached == dst_cached)
- rename_subvol = src_cached;
- else
- rename_subvol = dst_hashed;
-
- if ((src_cached != dst_hashed) && (rename_subvol == dst_hashed)) {
- DHT_MARKER_DONT_ACCOUNT(dict);
- }
-
- gf_msg_trace (this->name, 0,
- "renaming %s => %s (%s)",
- local->loc.path, local->loc2.path, rename_subvol->name);
-
- if (local->linked == _gf_true)
- FRAME_SU_DO (frame, dht_local_t);
- STACK_WIND (frame, dht_rename_cbk,
- rename_subvol, rename_subvol->fops->rename,
- &local->loc, &local->loc2, dict);
-
- return 0;
+ dht_local_t *local = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *this = NULL;
+ xlator_t *rename_subvol = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
+ src_cached = local->src_cached;
+
+ if (src_cached == dst_cached)
+ rename_subvol = src_cached;
+ else
+ rename_subvol = dst_hashed;
+
+ if ((src_cached != dst_hashed) && (rename_subvol == dst_hashed)) {
+ DHT_MARKER_DONT_ACCOUNT(local->xattr_req);
+ }
+
+ if (rename_subvol == src_cached) {
+ DHT_CHANGELOG_TRACK_AS_RENAME(local->xattr_req, &local->loc,
+ &local->loc2);
+ }
+
+ gf_msg_trace(this->name, 0, "renaming %s => %s (%s)", local->loc.path,
+ local->loc2.path, rename_subvol->name);
+
+ if (local->linked == _gf_true)
+ FRAME_SU_DO(frame, dht_local_t);
+ STACK_WIND_COOKIE(frame, dht_rename_cbk, rename_subvol, rename_subvol,
+ rename_subvol->fops->rename, &local->loc, &local->loc2,
+ local->xattr_req);
+ return 0;
}
int
-dht_rename_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+dht_rename_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
- local = frame->local;
- prev = cookie;
+ local = frame->local;
+ prev = cookie;
- if (op_ret == -1) {
- gf_msg_debug (this->name, 0,
- "link/file on %s failed (%s)",
- prev->this->name, strerror (op_errno));
- local->op_ret = -1;
- local->op_errno = op_errno;
- local->added_link = _gf_false;
- } else
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ if (op_ret == -1) {
+ gf_msg_debug(this->name, 0, "link/file on %s failed (%s)", prev->name,
+ strerror(op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ local->added_link = _gf_false;
+ } else
+ dht_iatt_merge(this, &local->stbuf, stbuf);
- if (local->op_ret == -1)
- goto cleanup;
+ if (local->op_ret == -1)
+ goto cleanup;
- dht_do_rename (frame);
+ dht_do_rename(frame);
- return 0;
+ return 0;
cleanup:
- dht_rename_cleanup (frame);
+ dht_rename_cleanup(frame);
- return 0;
+ return 0;
}
int
-dht_rename_linkto_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+dht_rename_linkto_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *src_cached = NULL;
- dict_t *xattr = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *src_cached = NULL;
+ dict_t *xattr = NULL;
- DHT_MARK_FOP_INTERNAL (xattr);
+ local = frame->local;
+ DHT_MARK_FOP_INTERNAL(xattr);
+ prev = cookie;
+ src_cached = local->src_cached;
- local = frame->local;
- prev = cookie;
+ if (op_ret == -1) {
+ gf_msg_debug(this->name, 0, "link/file on %s failed (%s)", prev->name,
+ strerror(op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
- src_cached = local->src_cached;
+ /* If linkto creation failed move to failure cleanup code,
+ * instead of continuing with creating the link file */
+ if (local->op_ret != 0) {
+ goto cleanup;
+ }
- if (op_ret == -1) {
- gf_msg_debug (this->name, 0,
- "link/file on %s failed (%s)",
- prev->this->name, strerror (op_errno));
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
+ gf_msg_trace(this->name, 0, "link %s => %s (%s)", local->loc.path,
+ local->loc2.path, src_cached->name);
+ if (gf_uuid_compare(local->loc.pargfid, local->loc2.pargfid) == 0) {
+ DHT_MARKER_DONT_ACCOUNT(xattr);
+ }
- /* If linkto creation failed move to failure cleanup code,
- * instead of continuing with creating the link file */
- if (local->op_ret != 0) {
- goto cleanup;
- }
-
- gf_msg_trace (this->name, 0,
- "link %s => %s (%s)", local->loc.path,
- local->loc2.path, src_cached->name);
- if (gf_uuid_compare (local->loc.pargfid,
- local->loc2.pargfid) == 0) {
- DHT_MARKER_DONT_ACCOUNT(xattr);
- }
+ local->added_link = _gf_true;
- local->added_link = _gf_true;
+ STACK_WIND_COOKIE(frame, dht_rename_link_cbk, src_cached, src_cached,
+ src_cached->fops->link, &local->loc, &local->loc2, xattr);
- STACK_WIND (frame, dht_rename_link_cbk,
- src_cached, src_cached->fops->link,
- &local->loc, &local->loc2, xattr);
+ if (xattr)
+ dict_unref(xattr);
- if (xattr)
- dict_unref (xattr);
-
- return 0;
+ return 0;
cleanup:
- dht_rename_cleanup (frame);
+ dht_rename_cleanup(frame);
- if (xattr)
- dict_unref (xattr);
+ if (xattr)
+ dict_unref(xattr);
- return 0;
+ return 0;
}
int
-dht_rename_unlink_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+dht_rename_unlink_links_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ local = frame->local;
+ prev = cookie;
- local = frame->local;
- prev = cookie;
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ gf_msg_debug(this->name, 0, "unlink of %s on %s failed (%s)",
+ local->loc2.path, prev->name, strerror(op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_msg_debug (this->name, 0,
- "unlink of %s on %s failed (%s)",
- local->loc2.path, prev->this->name,
- strerror (op_errno));
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
+ if (local->op_ret == -1)
+ goto cleanup;
- if (local->op_ret == -1)
- goto cleanup;
+ dht_do_rename(frame);
- dht_do_rename (frame);
-
- return 0;
+ return 0;
cleanup:
- dht_rename_cleanup (frame);
+ dht_rename_cleanup(frame);
- return 0;
+ return 0;
}
-
int
-dht_rename_create_links (call_frame_t *frame)
+dht_rename_create_links(call_frame_t *frame)
{
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *dst_cached = NULL;
- int call_cnt = 0;
- dict_t *xattr = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ int call_cnt = 0;
+ dict_t *xattr = NULL;
+ local = frame->local;
+ this = frame->this;
- local = frame->local;
- this = frame->this;
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
- src_hashed = local->src_hashed;
- src_cached = local->src_cached;
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
+ DHT_MARK_FOP_INTERNAL(xattr);
- DHT_MARK_FOP_INTERNAL (xattr);
+ if (src_cached == dst_cached) {
+ dict_t *xattr_new = NULL;
- if (src_cached == dst_cached) {
- dict_t *xattr_new = NULL;
+ if (dst_hashed == dst_cached)
+ goto nolinks;
- if (dst_hashed == dst_cached)
- goto nolinks;
+ xattr_new = dict_copy_with_ref(xattr, NULL);
- xattr_new = dict_copy_with_ref (xattr, NULL);
+ gf_msg_trace(this->name, 0, "unlinking dst linkfile %s @ %s",
+ local->loc2.path, dst_hashed->name);
- gf_msg_trace (this->name, 0,
- "unlinking dst linkfile %s @ %s",
- local->loc2.path, dst_hashed->name);
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
- DHT_MARKER_DONT_ACCOUNT(xattr_new);
+ STACK_WIND_COOKIE(frame, dht_rename_unlink_links_cbk, dst_hashed,
+ dst_hashed, dst_hashed->fops->unlink, &local->loc2, 0,
+ xattr_new);
- STACK_WIND (frame, dht_rename_unlink_links_cbk,
- dst_hashed, dst_hashed->fops->unlink,
- &local->loc2, 0, xattr_new);
-
- dict_unref (xattr_new);
- return 0;
- }
+ dict_unref(xattr_new);
+ if (xattr)
+ dict_unref(xattr);
- if (src_cached != dst_hashed) {
- /* needed to create the link file */
- call_cnt++;
- if (dst_hashed != src_hashed)
- /* needed to create the linkto file */
- call_cnt ++;
+ return 0;
+ }
+
+ if (src_cached != dst_hashed) {
+ /* needed to create the link file */
+ call_cnt++;
+ if (dst_hashed != src_hashed)
+ /* needed to create the linkto file */
+ call_cnt++;
+ }
+
+ /* We should not have any failures post the link creation, as this
+ * introduces the newname into the namespace. Clients could have cached
+ * the existence of the newname and may start taking actions based on
+ * the same. Hence create the linkto first, and then attempt the link.
+ *
+ * NOTE: If another client is attempting the same oldname -> newname
+ * rename, and finds both file names as existing, and are hard links
+ * to each other, then FUSE would send in an unlink for oldname. In
+ * this time duration if we treat the linkto as a critical error and
+ * unlink the newname we created, we would have effectively lost the
+ * file to rename operations. */
+ if (dst_hashed != src_hashed && src_cached != dst_hashed) {
+ gf_msg_trace(this->name, 0, "linkfile %s @ %s => %s", local->loc.path,
+ dst_hashed->name, src_cached->name);
+
+ memcpy(local->gfid, local->loc.inode->gfid, 16);
+ dht_linkfile_create(frame, dht_rename_linkto_cbk, this, src_cached,
+ dst_hashed, &local->loc);
+ } else if (src_cached != dst_hashed) {
+ dict_t *xattr_new = NULL;
+
+ xattr_new = dict_copy_with_ref(xattr, NULL);
+
+ gf_msg_trace(this->name, 0, "link %s => %s (%s)", local->loc.path,
+ local->loc2.path, src_cached->name);
+ if (gf_uuid_compare(local->loc.pargfid, local->loc2.pargfid) == 0) {
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
}
- /* We should not have any failures post the link creation, as this
- * introduces the newname into the namespace. Clients could have cached
- * the existence of the newname and may start taking actions based on
- * the same. Hence create the linkto first, and then attempt the link.
- *
- * NOTE: If another client is attempting the same oldname -> newname
- * rename, and finds both file names as existing, and are hard links
- * to each other, then FUSE would send in an unlink for oldname. In
- * this time duration if we treat the linkto as a critical error and
- * unlink the newname we created, we would have effectively lost the
- * file to rename operations. */
- if (dst_hashed != src_hashed && src_cached != dst_hashed) {
- gf_msg_trace (this->name, 0,
- "linkfile %s @ %s => %s",
- local->loc.path, dst_hashed->name,
- src_cached->name);
-
- memcpy (local->gfid, local->loc.inode->gfid, 16);
- dht_linkfile_create (frame, dht_rename_linkto_cbk, this,
- src_cached, dst_hashed, &local->loc);
- } else if (src_cached != dst_hashed) {
- dict_t *xattr_new = NULL;
-
- xattr_new = dict_copy_with_ref (xattr, NULL);
-
- gf_msg_trace (this->name, 0,
- "link %s => %s (%s)", local->loc.path,
- local->loc2.path, src_cached->name);
- if (gf_uuid_compare (local->loc.pargfid,
- local->loc2.pargfid) == 0) {
- DHT_MARKER_DONT_ACCOUNT(xattr_new);
- }
-
- local->added_link = _gf_true;
+ local->added_link = _gf_true;
- STACK_WIND (frame, dht_rename_link_cbk,
- src_cached, src_cached->fops->link,
- &local->loc, &local->loc2, xattr_new);
+ STACK_WIND_COOKIE(frame, dht_rename_link_cbk, src_cached, src_cached,
+ src_cached->fops->link, &local->loc, &local->loc2,
+ xattr_new);
- dict_unref (xattr_new);
- }
+ dict_unref(xattr_new);
+ }
nolinks:
- if (!call_cnt) {
- /* skip to next step */
- dht_do_rename (frame);
- }
- if (xattr)
- dict_unref (xattr);
-
- return 0;
+ if (!call_cnt) {
+ /* skip to next step */
+ dht_do_rename(frame);
+ }
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
}
int
-dht_rename_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
+dht_rename_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
- dht_local_t *local = NULL;
- int call_cnt = 0;
- dht_conf_t *conf = NULL;
-
- local = frame->local;
- conf = this->private;
-
- if (op_ret < 0) {
- /* The meaning of is_linkfile is overloaded here. For locking
- * to work properly both rebalance and rename should acquire
- * lock on datafile. The reason for sending this lookup is to
- * find out whether we've acquired a lock on data file.
- * Between the lookup before rename and this rename, the
- * file could be migrated by a rebalance process and now this
- * file this might be a linkto file. We verify that by sending
- * this lookup. However, if this lookup fails we cannot really
- * say whether we've acquired lock on a datafile or linkto file.
- * So, we act conservatively and _assume_
- * that this is a linkfile and fail the rename operation.
- */
- local->is_linkfile = _gf_true;
- } else if (xattr && check_is_linkfile (inode, stbuf, xattr,
- conf->link_xattr_name)) {
- local->is_linkfile = _gf_true;
- }
-
- call_cnt = dht_frame_return (frame);
- if (is_last_call (call_cnt)) {
- if (local->is_linkfile) {
- local->op_ret = -1;
- local->op_errno = EBUSY;
- goto fail;
+ dht_local_t *local = NULL;
+ int call_cnt = 0;
+ dht_conf_t *conf = NULL;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char gfid_server[GF_UUID_BUF_SIZE] = {0};
+ int child_index = -1;
+ gf_boolean_t is_src = _gf_false;
+ loc_t *loc = NULL;
+
+ child_index = (long)cookie;
+
+ local = frame->local;
+ conf = this->private;
+
+ is_src = (child_index == 0);
+ if (is_src)
+ loc = &local->loc;
+ else
+ loc = &local->loc2;
+
+ if (op_ret >= 0) {
+ if (is_src)
+ local->src_cached = dht_subvol_get_cached(this, local->loc.inode);
+ else {
+ if (loc->inode)
+ gf_uuid_unparse(loc->inode->gfid, gfid_local);
+
+ gf_msg_debug(this->name, 0,
+ "dst_cached before lookup: %s, "
+ "(path:%s)(gfid:%s),",
+ local->loc2.path,
+ local->dst_cached ? local->dst_cached->name : NULL,
+ local->dst_cached ? gfid_local : NULL);
+
+ local->dst_cached = dht_subvol_get_cached(this,
+ local->loc2_copy.inode);
+
+ gf_uuid_unparse(stbuf->ia_gfid, gfid_local);
+
+ gf_msg_debug(this->name, GF_LOG_WARNING,
+ "dst_cached after lookup: %s, "
+ "(path:%s)(gfid:%s)",
+ local->loc2.path,
+ local->dst_cached ? local->dst_cached->name : NULL,
+ local->dst_cached ? gfid_local : NULL);
+
+ if ((local->loc2.inode == NULL) ||
+ gf_uuid_compare(stbuf->ia_gfid, local->loc2.inode->gfid)) {
+ if (local->loc2.inode != NULL) {
+ inode_unlink(local->loc2.inode, local->loc2.parent,
+ local->loc2.name);
+ inode_unref(local->loc2.inode);
}
- dht_rename_create_links (frame);
+ local->loc2.inode = inode_link(local->loc2_copy.inode,
+ local->loc2_copy.parent,
+ local->loc2_copy.name, stbuf);
+ gf_uuid_copy(local->loc2.gfid, stbuf->ia_gfid);
+ }
+ }
+ }
+
+ if (op_ret < 0) {
+ if (is_src) {
+ /* The meaning of is_linkfile is overloaded here. For locking
+ * to work properly both rebalance and rename should acquire
+ * lock on datafile. The reason for sending this lookup is to
+ * find out whether we've acquired a lock on data file.
+ * Between the lookup before rename and this rename, the
+ * file could be migrated by a rebalance process and now this
+ * file this might be a linkto file. We verify that by sending
+ * this lookup. However, if this lookup fails we cannot really
+ * say whether we've acquired lock on a datafile or linkto file.
+ * So, we act conservatively and _assume_
+ * that this is a linkfile and fail the rename operation.
+ */
+ local->is_linkfile = _gf_true;
+ local->op_errno = op_errno;
+ } else {
+ if (local->dst_cached)
+ gf_msg_debug(this->name, op_errno,
+ "file %s (gfid:%s) was present "
+ "(hashed-subvol=%s, "
+ "cached-subvol=%s) before rename,"
+ " but lookup failed",
+ local->loc2.path,
+ uuid_utoa(local->loc2.inode->gfid),
+ local->dst_hashed->name, local->dst_cached->name);
+ if (dht_inode_missing(op_errno))
+ local->dst_cached = NULL;
+ }
+ } else if (is_src && xattr &&
+ check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name)) {
+ local->is_linkfile = _gf_true;
+ /* Found linkto file instead of data file, passdown ENOENT
+ * based on the above comment */
+ local->op_errno = ENOENT;
+ }
+
+ if (!local->is_linkfile && (op_ret >= 0) &&
+ gf_uuid_compare(loc->gfid, stbuf->ia_gfid)) {
+ gf_uuid_unparse(loc->gfid, gfid_local);
+ gf_uuid_unparse(stbuf->ia_gfid, gfid_server);
+
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_MISMATCH,
+ "path:%s, received a different gfid, local_gfid= %s"
+ " server_gfid: %s",
+ local->loc.path, gfid_local, gfid_server);
+
+ /* Will passdown ENOENT anyway since the file we sent on
+ * rename is replaced with a different file */
+ local->op_errno = ENOENT;
+ /* Since local->is_linkfile is used here to detect failure,
+ * marking this to true */
+ local->is_linkfile = _gf_true;
+ }
+
+ call_cnt = dht_frame_return(frame);
+ if (is_last_call(call_cnt)) {
+ if (local->is_linkfile) {
+ local->op_ret = -1;
+ goto fail;
}
- return 0;
+ dht_rename_create_links(frame);
+ }
+
+ return 0;
fail:
- dht_rename_unlock (frame, this);
- return 0;
+ dht_rename_unlock(frame, this);
+ return 0;
}
-int32_t
-dht_rename_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+int
+dht_rename_file_lock1_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- char src_gfid[GF_UUID_BUF_SIZE] = {0};
- char dst_gfid[GF_UUID_BUF_SIZE] = {0};
- dict_t *xattr_req = NULL;
- dht_conf_t *conf = NULL;
- int i = 0;
-
- local = frame->local;
- conf = this->private;
-
- if (op_ret < 0) {
- uuid_utoa_r (local->loc.inode->gfid, src_gfid);
-
- if (local->loc2.inode)
- uuid_utoa_r (local->loc2.inode->gfid, dst_gfid);
-
- gf_log (this->name, GF_LOG_WARNING,
- "acquiring inodelk failed (%s) "
- "rename (%s:%s:%s %s:%s:%s), returning EBUSY",
- strerror (op_errno),
- local->loc.path, src_gfid, local->src_cached->name,
- local->loc2.path, dst_gfid,
- local->dst_cached ? local->dst_cached->name : NULL);
-
- local->op_ret = -1;
- local->op_errno = (op_errno == EAGAIN) ? EBUSY : op_errno;
-
- goto done;
- }
+ dht_local_t *local = NULL;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+ loc_t *loc = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ uuid_utoa_r(local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r(local->loc2.inode->gfid, dst_gfid);
+
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR,
+ "protecting namespace of %s failed"
+ "rename (%s:%s:%s %s:%s:%s)",
+ local->current == &local->lock[0] ? local->loc.path
+ : local->loc2.path,
+ local->loc.path, src_gfid, local->src_hashed->name,
+ local->loc2.path, dst_gfid,
+ local->dst_hashed ? local->dst_hashed->name : NULL);
+
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ if (local->current == &local->lock[0]) {
+ loc = &local->loc2;
+ subvol = local->dst_hashed;
+ local->current = &local->lock[1];
+ } else {
+ loc = &local->loc;
+ subvol = local->src_hashed;
+ local->current = &local->lock[0];
+ }
+
+ ret = dht_protect_namespace(frame, loc, subvol, &local->current->ns,
+ dht_rename_lock_cbk);
+ if (ret < 0) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ return 0;
+err:
+ /* No harm in calling an extra unlock */
+ dht_rename_unlock(frame, this);
+ return 0;
+}
- xattr_req = dict_new ();
- if (xattr_req == NULL) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto done;
- }
+int32_t
+dht_rename_file_protect_namespace(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+ loc_t *loc = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ uuid_utoa_r(local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r(local->loc2.inode->gfid, dst_gfid);
+
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR,
+ "acquiring inodelk failed "
+ "rename (%s:%s:%s %s:%s:%s)",
+ local->loc.path, src_gfid, local->src_cached->name,
+ local->loc2.path, dst_gfid,
+ local->dst_cached ? local->dst_cached->name : NULL);
+
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ goto err;
+ }
+
+ /* Locks on src and dst needs to ordered which otherwise might cause
+ * deadlocks when rename (src, dst) and rename (dst, src) is done from
+ * two different clients
+ */
+ ret = dht_order_rename_lock(frame, &loc, &subvol);
+ if (ret) {
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = dht_protect_namespace(frame, loc, subvol, &local->current->ns,
+ dht_rename_file_lock1_cbk);
+ if (ret < 0) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ return 0;
- op_ret = dict_set_uint32 (xattr_req,
- conf->link_xattr_name, 256);
- if (op_ret < 0) {
- local->op_ret = -1;
- local->op_errno = -op_ret;
- goto done;
- }
+err:
+ /* Its fine to call unlock even when no locks are acquired, as we check
+ * for lock->locked before winding a unlock call.
+ */
+ dht_rename_unlock(frame, this);
- local->call_cnt = local->lock.lk_count;
+ return 0;
+}
- for (i = 0; i < local->lock.lk_count; i++) {
- STACK_WIND (frame, dht_rename_lookup_cbk,
- local->lock.locks[i]->xl,
- local->lock.locks[i]->xl->fops->lookup,
- &local->lock.locks[i]->loc, xattr_req);
+int32_t
+dht_rename_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+ dict_t *xattr_req = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *subvol = NULL;
+ dht_lock_t *lock = NULL;
+
+ local = frame->local;
+ conf = this->private;
+
+ if (op_ret < 0) {
+ uuid_utoa_r(local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r(local->loc2.inode->gfid, dst_gfid);
+
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR,
+ "protecting namespace of %s failed. "
+ "rename (%s:%s:%s %s:%s:%s)",
+ local->current == &local->lock[0] ? local->loc.path
+ : local->loc2.path,
+ local->loc.path, src_gfid, local->src_hashed->name,
+ local->loc2.path, dst_gfid,
+ local->dst_hashed ? local->dst_hashed->name : NULL);
+
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ goto done;
+ }
+
+ xattr_req = dict_new();
+ if (xattr_req == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto done;
+ }
+
+ op_ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto done;
+ }
+
+ /* dst_cached might've changed. This normally happens for two reasons:
+ * 1. rebalance migrated dst
+ * 2. Another parallel rename was done overwriting dst
+ *
+ * Doing a lookup on local->loc2 when dst exists, but is associated
+ * with a different gfid will result in an ESTALE error. So, do a fresh
+ * lookup with a new inode on dst-path and handle change of dst-cached
+ * in the cbk. Also, to identify dst-cached changes we do a lookup on
+ * "this" rather than the subvol.
+ */
+ loc_copy(&local->loc2_copy, &local->loc2);
+ inode_unref(local->loc2_copy.inode);
+ local->loc2_copy.inode = inode_new(local->loc.inode->table);
+
+ /* Why not use local->lock.locks[?].loc for lookup post lock phase
+ * ---------------------------------------------------------------
+ * "layout.parent_layout.locks[?].loc" does not have the name and pargfid
+ * populated.
+ * Reason: If we had populated the name and pargfid, server might
+ * resolve to a successful lookup even if there is a file with same name
+ * with a different gfid(unlink & create) as server does name based
+ * resolution on first priority. And this can result in operating on a
+ * different inode entirely.
+ *
+ * Now consider a scenario where source file was renamed by some other
+ * client to a new name just before this lock was granted. So if a
+ * lookup would be done on local->lock[0].layout.parent_layout.locks[?].loc,
+ * server will send success even if the entry was renamed (since server will
+ * do a gfid based resolution). So once a lock is granted, make sure the
+ * file exists with the name that the client requested with.
+ * */
+
+ local->call_cnt = 2;
+ for (i = 0; i < 2; i++) {
+ if (i == 0) {
+ lock = local->rename_inodelk_backward_compatible[0];
+ if (gf_uuid_compare(local->loc.gfid, lock->loc.gfid) == 0)
+ subvol = lock->xl;
+ else {
+ lock = local->rename_inodelk_backward_compatible[1];
+ subvol = lock->xl;
+ }
+ } else {
+ subvol = this;
}
- dict_unref (xattr_req);
- return 0;
+ STACK_WIND_COOKIE(frame, dht_rename_lookup_cbk, (void *)(long)i, subvol,
+ subvol->fops->lookup,
+ (i == 0) ? &local->loc : &local->loc2_copy,
+ xattr_req);
+ }
+
+ dict_unref(xattr_req);
+ return 0;
done:
- /* Its fine to call unlock even when no locks are acquired, as we check
- * for lock->locked before winding a unlock call.
- */
- dht_rename_unlock (frame, this);
+ /* Its fine to call unlock even when no locks are acquired, as we check
+ * for lock->locked before winding a unlock call.
+ */
+ dht_rename_unlock(frame, this);
- if (xattr_req)
- dict_unref (xattr_req);
+ if (xattr_req)
+ dict_unref(xattr_req);
- return 0;
+ return 0;
}
int
-dht_rename_lock (call_frame_t *frame)
+dht_rename_lock(call_frame_t *frame)
{
- dht_local_t *local = NULL;
- int count = 1, ret = -1;
- dht_lock_t **lk_array = NULL;
-
- local = frame->local;
-
- if (local->dst_cached)
- count++;
-
- lk_array = GF_CALLOC (count, sizeof (*lk_array), gf_common_mt_char);
- if (lk_array == NULL)
- goto err;
-
- lk_array[0] = dht_lock_new (frame->this, local->src_cached, &local->loc,
- F_WRLCK, DHT_FILE_MIGRATE_DOMAIN);
- if (lk_array[0] == NULL)
- goto err;
-
- if (local->dst_cached) {
- lk_array[1] = dht_lock_new (frame->this, local->dst_cached,
- &local->loc2, F_WRLCK,
- DHT_FILE_MIGRATE_DOMAIN);
- if (lk_array[1] == NULL)
- goto err;
- }
-
- local->lock.locks = lk_array;
- local->lock.lk_count = count;
-
- ret = dht_nonblocking_inodelk (frame, lk_array, count,
- dht_rename_lock_cbk);
- if (ret < 0) {
- local->lock.locks = NULL;
- local->lock.lk_count = 0;
- goto err;
- }
-
- return 0;
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1;
+ dht_lock_t **lk_array = NULL;
+
+ local = frame->local;
+
+ if (local->dst_cached)
+ count++;
+
+ lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
+ if (lk_array == NULL)
+ goto err;
+
+ lk_array[0] = dht_lock_new(frame->this, local->src_cached, &local->loc,
+ F_WRLCK, DHT_FILE_MIGRATE_DOMAIN, NULL,
+ FAIL_ON_ANY_ERROR);
+ if (lk_array[0] == NULL)
+ goto err;
+
+ if (local->dst_cached) {
+ /* dst might be removed by the time inodelk reaches bricks,
+ * which can result in ESTALE errors. POSIX imposes no
+ * restriction for dst to be present for renames to be
+ * successful. So, we'll ignore ESTALE errors. As far as
+ * synchronization on dst goes, we'll achieve the same by
+ * holding entrylk on parent directory of dst in the namespace
+ * of basename(dst). Also, there might not be quorum in cluster
+ * xlators like EC/disperse on errno, in which case they return
+ * EIO. For eg., in a disperse (4 + 2), 3 might return success
+ * and three might return ESTALE. Disperse, having no Quorum
+ * unwinds inodelk with EIO. So, ignore EIO too.
+ */
+ lk_array[1] = dht_lock_new(frame->this, local->dst_cached, &local->loc2,
+ F_WRLCK, DHT_FILE_MIGRATE_DOMAIN, NULL,
+ IGNORE_ENOENT_ESTALE_EIO);
+ if (lk_array[1] == NULL)
+ goto err;
+ }
+
+ local->rename_inodelk_backward_compatible = lk_array;
+ local->rename_inodelk_bc_count = count;
+
+ /* retaining inodelks for the sake of backward compatibility. Please
+ * make sure to remove this inodelk once all of 3.10, 3.12 and 3.13
+ * reach EOL. Better way of getting synchronization would be to acquire
+ * entrylks on src and dst parent directories in the namespace of
+ * basenames of src and dst
+ */
+ ret = dht_blocking_inodelk(frame, lk_array, count,
+ dht_rename_file_protect_namespace);
+ if (ret < 0) {
+ local->rename_inodelk_backward_compatible = NULL;
+ local->rename_inodelk_bc_count = 0;
+ goto err;
+ }
+
+ return 0;
err:
- if (lk_array != NULL) {
- int tmp_count = 0, i = 0;
+ if (lk_array != NULL) {
+ int tmp_count = 0, i = 0;
- for (i = 0; (i < count) && (lk_array[i]); i++, tmp_count++);
+ for (i = 0; (i < count) && (lk_array[i]); i++, tmp_count++)
+ ;
- dht_lock_array_free (lk_array, tmp_count);
- GF_FREE (lk_array);
- }
+ dht_lock_array_free(lk_array, tmp_count);
+ GF_FREE(lk_array);
+ }
- return -1;
+ return -1;
}
int
-dht_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+dht_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- xlator_t *src_cached = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *dst_cached = NULL;
- xlator_t *dst_hashed = NULL;
- int op_errno = -1;
- int ret = -1;
- dht_local_t *local = NULL;
- char gfid[GF_UUID_BUF_SIZE] = {0};
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (oldloc, err);
- VALIDATE_OR_GOTO (newloc, err);
-
- gf_uuid_unparse(oldloc->inode->gfid, gfid);
-
- src_hashed = dht_subvol_get_hashed (this, oldloc);
- if (!src_hashed) {
- gf_msg (this->name, GF_LOG_INFO, 0,
- DHT_MSG_RENAME_FAILED,
- "No hashed subvolume in layout for path=%s,"
- "(gfid = %s)", oldloc->path, gfid);
- op_errno = EINVAL;
- goto err;
- }
-
- src_cached = dht_subvol_get_cached (this, oldloc->inode);
- if (!src_cached) {
- gf_msg (this->name, GF_LOG_INFO, 0,
- DHT_MSG_RENAME_FAILED,
- "No cached subvolume for path = %s,"
- "(gfid = %s)", oldloc->path, gfid);
-
- op_errno = EINVAL;
- goto err;
- }
-
- dst_hashed = dht_subvol_get_hashed (this, newloc);
- if (!dst_hashed) {
- gf_msg (this->name, GF_LOG_INFO, 0,
- DHT_MSG_RENAME_FAILED,
- "No hashed subvolume in layout for path=%s",
- newloc->path);
- op_errno = EINVAL;
- goto err;
+ xlator_t *src_cached = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ int op_errno = -1;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ char newgfid[GF_UUID_BUF_SIZE] = {0};
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(oldloc, err);
+ VALIDATE_OR_GOTO(newloc, err);
+
+ gf_uuid_unparse(oldloc->inode->gfid, gfid);
+
+ src_hashed = dht_subvol_get_hashed(this, oldloc);
+ if (!src_hashed) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_FAILED,
+ "No hashed subvolume in layout for path=%s,"
+ "(gfid = %s)",
+ oldloc->path, gfid);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ src_cached = dht_subvol_get_cached(this, oldloc->inode);
+ if (!src_cached) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_FAILED,
+ "No cached subvolume for path = %s,"
+ "(gfid = %s)",
+ oldloc->path, gfid);
+
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ dst_hashed = dht_subvol_get_hashed(this, newloc);
+ if (!dst_hashed) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_FAILED,
+ "No hashed subvolume in layout for path=%s", newloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (newloc->inode)
+ dst_cached = dht_subvol_get_cached(this, newloc->inode);
+
+ local = dht_local_init(frame, oldloc, NULL, GF_FOP_RENAME);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ /* cached_subvol will be set from dht_local_init, reset it to NULL,
+ as the logic of handling rename is different */
+ local->cached_subvol = NULL;
+
+ ret = loc_copy(&local->loc2, newloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->src_hashed = src_hashed;
+ local->src_cached = src_cached;
+ local->dst_hashed = dst_hashed;
+ local->dst_cached = dst_cached;
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ if (newloc->inode)
+ gf_uuid_unparse(newloc->inode->gfid, newgfid);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_INFO,
+ "renaming %s (%s) (hash=%s/cache=%s) => %s (%s) "
+ "(hash=%s/cache=%s) ",
+ oldloc->path, gfid, src_hashed->name, src_cached->name, newloc->path,
+ newloc->inode ? newgfid : NULL, dst_hashed->name,
+ dst_cached ? dst_cached->name : "<nul>");
+
+ if (IA_ISDIR(oldloc->inode->ia_type)) {
+ dht_rename_dir(frame, this);
+ } else {
+ local->op_ret = 0;
+ ret = dht_rename_lock(frame);
+ if (ret < 0) {
+ op_errno = ENOMEM;
+ goto err;
}
+ }
- if (newloc->inode)
- dst_cached = dht_subvol_get_cached (this, newloc->inode);
+ return 0;
- local = dht_local_init (frame, oldloc, NULL, GF_FOP_RENAME);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- /* cached_subvol will be set from dht_local_init, reset it to NULL,
- as the logic of handling rename is different */
- local->cached_subvol = NULL;
-
- ret = loc_copy (&local->loc2, newloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
- }
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
- local->src_hashed = src_hashed;
- local->src_cached = src_cached;
- local->dst_hashed = dst_hashed;
- local->dst_cached = dst_cached;
+ return 0;
+}
- gf_log (this->name, GF_LOG_INFO,
- "renaming %s (hash=%s/cache=%s) => %s (hash=%s/cache=%s)",
- oldloc->path, src_hashed->name, src_cached->name,
- newloc->path, dst_hashed->name,
- dst_cached ? dst_cached->name : "<nul>");
+int
+dht_pt_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ gf_boolean_t free_xdata = _gf_false;
- if (IA_ISDIR (oldloc->inode->ia_type)) {
- dht_rename_dir (frame, this);
- } else {
- local->op_ret = 0;
- ret = dht_rename_lock (frame);
- if (ret < 0)
- goto err;
+ /* Just a pass through */
+ if (!IA_ISDIR(oldloc->inode->ia_type)) {
+ if (!xdata) {
+ free_xdata = _gf_true;
}
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL, NULL);
-
- return 0;
+ DHT_CHANGELOG_TRACK_AS_RENAME(xdata, oldloc, newloc);
+ }
+ default_rename(frame, this, oldloc, newloc, xdata);
+ if (free_xdata && xdata) {
+ dict_unref(xdata);
+ xdata = NULL;
+ }
+ return 0;
}
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index 260c50083df..3e24065227c 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -8,1929 +8,2593 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "dht-common.h"
-#include "dht-messages.h"
-#include "glusterfs-acl.h"
-
-#define DHT_SET_LAYOUT_RANGE(layout,i,srt,chunk,path) do { \
- layout->list[i].start = srt; \
- layout->list[i].stop = srt + chunk - 1; \
- \
- gf_msg_trace (this->name, 0, \
- "gave fix: %u - %u on %s for %s", \
- layout->list[i].start, \
- layout->list[i].stop, \
- layout->list[i].xlator->name, path); \
- } while (0)
-
-#define DHT_RESET_LAYOUT_RANGE(layout) do { \
- int cnt = 0; \
- for (cnt = 0; cnt < layout->cnt; cnt++ ) { \
- layout->list[cnt].start = 0; \
- layout->list[cnt].stop = 0; \
- } \
- } while (0)
+#include "dht-lock.h"
+
+#define DHT_SET_LAYOUT_RANGE(layout, i, srt, chunk, path) \
+ do { \
+ layout->list[i].start = srt; \
+ layout->list[i].stop = srt + chunk - 1; \
+ layout->list[i].commit_hash = layout->commit_hash; \
+ \
+ gf_msg_trace(this->name, 0, \
+ "gave fix: 0x%x - 0x%x, with commit-hash 0x%x" \
+ " on %s for %s", \
+ layout->list[i].start, layout->list[i].stop, \
+ layout->list[i].commit_hash, \
+ layout->list[i].xlator->name, path); \
+ } while (0)
+
+#define DHT_RESET_LAYOUT_RANGE(layout) \
+ do { \
+ int cnt = 0; \
+ for (cnt = 0; cnt < layout->cnt; cnt++) { \
+ layout->list[cnt].start = 0; \
+ layout->list[cnt].stop = 0; \
+ } \
+ } while (0)
-int
-dht_selfheal_layout_lock (call_frame_t *frame, dht_layout_t *layout,
- gf_boolean_t newdir,
- dht_selfheal_layout_t healer,
- dht_need_heal_t should_heal);
+static int
+dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
+ gf_boolean_t newdir, dht_selfheal_layout_t healer,
+ dht_need_heal_t should_heal);
static uint32_t
-dht_overlap_calc (dht_layout_t *old, int o, dht_layout_t *new, int n)
+dht_overlap_calc(dht_layout_t *old, int o, dht_layout_t *new, int n)
{
- if (o >= old->cnt || n >= new->cnt)
- return 0;
+ if (o >= old->cnt || n >= new->cnt)
+ return 0;
- if (old->list[o].err > 0 || new->list[n].err > 0)
- return 0;
+ if (old->list[o].err > 0 || new->list[n].err > 0)
+ return 0;
- if (old->list[o].start == old->list[o].stop) {
- return 0;
- }
+ if (old->list[o].start == old->list[o].stop) {
+ return 0;
+ }
- if (new->list[n].start == new->list[n].stop) {
- return 0;
- }
+ if (new->list[n].start == new->list[n].stop) {
+ return 0;
+ }
- if ((old->list[o].start > new->list[n].stop) ||
- (old->list[o].stop < new->list[n].start))
- return 0;
+ if ((old->list[o].start > new->list[n].stop) ||
+ (old->list[o].stop < new->list[n].start))
+ return 0;
- return min (old->list[o].stop, new->list[n].stop) -
- max (old->list[o].start, new->list[n].start) + 1;
+ return min(old->list[o].stop, new->list[n].stop) -
+ max(old->list[o].start, new->list[n].start) + 1;
}
int
-dht_selfheal_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+dht_selfheal_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_DESTROY (frame);
- return 0;
+ DHT_STACK_DESTROY(frame);
+ return 0;
}
int
-dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)
+dht_selfheal_dir_finish(call_frame_t *frame, xlator_t *this, int ret,
+ int invoke_cbk)
{
- dht_local_t *local = NULL, *lock_local = NULL;
- call_frame_t *lock_frame = NULL;
- int lock_count = 0;
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
- local = frame->local;
- lock_count = dht_lock_count (local->lock.locks, local->lock.lk_count);
+ local = frame->local;
- if (lock_count == 0)
- goto done;
+ /* Unlock entrylk */
+ dht_unlock_entrylk_wrapper(frame, &local->lock[0].ns.directory_ns);
- lock_frame = copy_frame (frame);
- if (lock_frame == NULL) {
- goto done;
- }
+ /* Unlock inodelk */
+ lock_count = dht_lock_count(local->lock[0].ns.parent_layout.locks,
+ local->lock[0].ns.parent_layout.lk_count);
+ if (lock_count == 0)
+ goto done;
- lock_local = dht_local_init (lock_frame, &local->loc, NULL,
- lock_frame->root->op);
- if (lock_local == NULL) {
- goto done;
- }
+ lock_frame = copy_frame(frame);
+ if (lock_frame == NULL) {
+ goto done;
+ }
- lock_local->lock.locks = local->lock.locks;
- lock_local->lock.lk_count = local->lock.lk_count;
+ lock_local = dht_local_init(lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL) {
+ goto done;
+ }
- local->lock.locks = NULL;
- local->lock.lk_count = 0;
+ lock_local->lock[0].ns.parent_layout.locks = local->lock[0]
+ .ns.parent_layout.locks;
+ lock_local->lock[0]
+ .ns.parent_layout.lk_count = local->lock[0].ns.parent_layout.lk_count;
- dht_unlock_inodelk (lock_frame, lock_local->lock.locks,
- lock_local->lock.lk_count,
- dht_selfheal_unlock_cbk);
- lock_frame = NULL;
+ local->lock[0].ns.parent_layout.locks = NULL;
+ local->lock[0].ns.parent_layout.lk_count = 0;
-done:
- local->selfheal.dir_cbk (frame, NULL, frame->this, ret,
- local->op_errno, NULL);
- if (lock_frame != NULL) {
- DHT_STACK_DESTROY (lock_frame);
- }
+ dht_unlock_inodelk(lock_frame, lock_local->lock[0].ns.parent_layout.locks,
+ lock_local->lock[0].ns.parent_layout.lk_count,
+ dht_selfheal_unlock_cbk);
+ lock_frame = NULL;
- return 0;
+done:
+ if (invoke_cbk)
+ local->selfheal.dir_cbk(frame, NULL, frame->this, ret, local->op_errno,
+ NULL);
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY(lock_frame);
+ }
+
+ return 0;
}
int
-dht_refresh_layout_done (call_frame_t *frame)
+dht_refresh_layout_done(call_frame_t *frame)
{
- int ret = -1;
- dht_layout_t *refreshed = NULL, *heal = NULL;
- dht_local_t *local = NULL;
- dht_need_heal_t should_heal = NULL;
- dht_selfheal_layout_t healer = NULL;
+ int ret = -1;
+ dht_layout_t *refreshed = NULL, *heal = NULL;
+ dht_local_t *local = NULL;
+ dht_need_heal_t should_heal = NULL;
+ dht_selfheal_layout_t healer = NULL;
- local = frame->local;
+ local = frame->local;
- refreshed = local->selfheal.refreshed_layout;
- heal = local->selfheal.layout;
+ refreshed = local->selfheal.refreshed_layout;
+ heal = local->selfheal.layout;
- healer = local->selfheal.healer;
- should_heal = local->selfheal.should_heal;
+ healer = local->selfheal.healer;
+ should_heal = local->selfheal.should_heal;
- ret = dht_layout_sort (refreshed);
- if (ret == -1) {
- gf_log (frame->this->name, GF_LOG_WARNING,
- "sorting the layout failed");
- goto err;
- }
+ ret = dht_layout_sort(refreshed);
+ if (ret == -1) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_SORT_FAILED, NULL);
+ goto err;
+ }
- if (should_heal (frame, &heal, &refreshed)) {
- healer (frame, &local->loc, heal);
- } else {
- local->selfheal.layout = NULL;
- local->selfheal.refreshed_layout = NULL;
- local->selfheal.layout = refreshed;
+ if (should_heal(frame, &heal, &refreshed)) {
+ healer(frame, &local->loc, heal);
+ } else {
+ local->selfheal.layout = NULL;
+ local->selfheal.refreshed_layout = NULL;
+ local->selfheal.layout = refreshed;
- dht_layout_unref (frame->this, heal);
+ dht_layout_unref(frame->this, heal);
- dht_selfheal_dir_finish (frame, frame->this, 0);
- }
+ dht_selfheal_dir_finish(frame, frame->this, 0, 1);
+ }
- return 0;
+ return 0;
err:
- dht_selfheal_dir_finish (frame, frame->this, -1);
- return 0;
+ dht_selfheal_dir_finish(frame, frame->this, -1, 1);
+ return 0;
}
int
-dht_refresh_layout_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
+dht_refresh_layout_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, err);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, err);
- GF_VALIDATE_OR_GOTO ("dht", this->private, err);
-
- local = frame->local;
- prev = cookie;
-
- layout = local->selfheal.refreshed_layout;
-
- LOCK (&frame->lock);
- {
- op_ret = dht_layout_merge (this, layout, prev->this,
- op_ret, op_errno, xattr);
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_msg_debug (this->name, 0,
- "lookup of %s on %s returned error (%s)",
- local->loc.path, prev->this->name,
- strerror (op_errno));
-
- goto unlock;
- }
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, err);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, err);
+ GF_VALIDATE_OR_GOTO("dht", this->private, err);
+
+ local = frame->local;
+ prev = cookie;
+
+ layout = local->selfheal.refreshed_layout;
+
+ LOCK(&frame->lock);
+ {
+ op_ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, xattr);
- local->op_ret = 0;
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+
+ if (op_ret == -1) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ local->op_errno = op_errno;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_FILE_LOOKUP_FAILED, "path=%s", local->loc.path,
+ "name=%s", prev->name, "gfid=%s", gfid, NULL);
+
+ goto unlock;
}
-unlock:
- UNLOCK (&frame->lock);
- this_call_cnt = dht_frame_return (frame);
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK(&frame->lock);
- if (is_last_call (this_call_cnt)) {
- if (local->op_ret == 0) {
- dht_refresh_layout_done (frame);
- } else {
- goto err;
- }
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ if (local->op_ret == 0) {
+ local->refresh_layout_done(frame);
+ } else {
+ goto err;
}
+ }
- return 0;
+ return 0;
err:
- dht_selfheal_dir_finish (frame, this, -1);
- return 0;
+ if (local) {
+ local->refresh_layout_unlock(frame, this, -1, 1);
+ }
+ return 0;
}
int
-dht_refresh_layout (call_frame_t *frame)
+dht_refresh_layout(call_frame_t *frame)
{
- int call_cnt = 0;
- int i = 0, ret = -1;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
-
- this = frame->this;
- conf = this->private;
- local = frame->local;
-
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
- local->op_ret = -1;
-
- if (local->selfheal.refreshed_layout) {
- dht_layout_unref (this, local->selfheal.refreshed_layout);
- local->selfheal.refreshed_layout = NULL;
- }
-
- local->selfheal.refreshed_layout = dht_layout_new (this,
- conf->subvolume_cnt);
- if (!local->selfheal.refreshed_layout) {
- goto out;
- }
-
- if (local->xattr != NULL) {
- dict_del (local->xattr, conf->xattr_name);
- }
-
+ int call_cnt = 0;
+ int i = 0, ret = -1;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+
+ this = frame->this;
+ conf = this->private;
+ local = frame->local;
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+ local->op_ret = -1;
+
+ if (local->selfheal.refreshed_layout) {
+ dht_layout_unref(this, local->selfheal.refreshed_layout);
+ local->selfheal.refreshed_layout = NULL;
+ }
+
+ local->selfheal.refreshed_layout = dht_layout_new(this,
+ conf->subvolume_cnt);
+ if (!local->selfheal.refreshed_layout) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "path=%s", local->loc.path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+
+ if (local->xattr != NULL) {
+ dict_del(local->xattr, conf->xattr_name);
+ }
+
+ if (local->xattr_req == NULL) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ local->xattr_req = dict_new();
if (local->xattr_req == NULL) {
- local->xattr_req = dict_new ();
- if (local->xattr_req == NULL) {
- goto out;
- }
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "path=%s", local->loc.path, "gfid=%s", gfid, NULL);
+ goto out;
}
+ }
- if (dict_get (local->xattr_req, conf->xattr_name) == 0) {
- ret = dict_set_uint32 (local->xattr_req, conf->xattr_name,
- 4 * 4);
- if (ret)
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:key = %s",
- local->loc.path, conf->xattr_name);
- }
+ if (dict_get(local->xattr_req, conf->xattr_name) == 0) {
+ ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", local->loc.path, "key=%s", conf->xattr_name,
+ NULL);
+ }
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_refresh_layout_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- &local->loc, local->xattr_req);
- }
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_refresh_layout_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ local->xattr_req);
+ }
- return 0;
+ return 0;
out:
- dht_selfheal_dir_finish (frame, this, -1);
- return 0;
+ if (local) {
+ local->refresh_layout_unlock(frame, this, -1, 1);
+ }
+ return 0;
}
-
int32_t
-dht_selfheal_layout_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+dht_selfheal_layout_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- if (op_ret < 0) {
- goto err;
- }
-
- dht_refresh_layout (frame);
- return 0;
-
-err:
- dht_selfheal_dir_finish (frame, this, -1);
- return 0;
-}
+ dht_local_t *local = NULL;
+ local = frame->local;
-gf_boolean_t
-dht_should_heal_layout (call_frame_t *frame, dht_layout_t **heal,
- dht_layout_t **ondisk)
-{
- gf_boolean_t fixit = _gf_true;
- dht_local_t *local = NULL;
- int ret = -1, heal_missing_dirs = 0;
+ if (!local) {
+ goto err;
+ }
- local = frame->local;
+ if (op_ret < 0) {
+ local->op_errno = op_errno;
+ goto err;
+ }
- if ((heal == NULL) || (*heal == NULL) || (ondisk == NULL)
- || (*ondisk == NULL))
- goto out;
+ local->refresh_layout_unlock = dht_selfheal_dir_finish;
+ local->refresh_layout_done = dht_refresh_layout_done;
- ret = dht_layout_anomalies (frame->this, &local->loc, *ondisk,
- &local->selfheal.hole_cnt,
- &local->selfheal.overlaps_cnt,
- NULL, &local->selfheal.down,
- &local->selfheal.misc, NULL);
+ dht_refresh_layout(frame);
+ return 0;
- if (ret < 0)
- goto out;
+err:
+ dht_selfheal_dir_finish(frame, this, -1, 1);
+ return 0;
+}
- /* Directories might've been created as part of this self-heal. We've to
- * sync non-layout xattrs and set range 0-0 on new directories
+gf_boolean_t
+dht_should_heal_layout(call_frame_t *frame, dht_layout_t **heal,
+ dht_layout_t **ondisk)
+{
+ gf_boolean_t fixit = _gf_true;
+ dht_local_t *local = NULL;
+ int heal_missing_dirs = 0;
+
+ local = frame->local;
+
+ if ((heal == NULL) || (*heal == NULL) || (ondisk == NULL) ||
+ (*ondisk == NULL))
+ goto out;
+
+ dht_layout_anomalies(
+ frame->this, &local->loc, *ondisk, &local->selfheal.hole_cnt,
+ &local->selfheal.overlaps_cnt, &local->selfheal.missing_cnt,
+ &local->selfheal.down, &local->selfheal.misc, NULL);
+
+ /* Directories might've been created as part of this self-heal. We've to
+ * sync non-layout xattrs and set range 0-0 on new directories
+ */
+ heal_missing_dirs = local->selfheal.force_mkdir
+ ? local->selfheal.force_mkdir
+ : dht_layout_missing_dirs(*heal);
+
+ if ((local->selfheal.hole_cnt == 0) &&
+ (local->selfheal.overlaps_cnt == 0) && heal_missing_dirs) {
+ dht_layout_t *tmp = NULL;
+
+ /* Just added a brick and need to set 0-0 range on this brick.
+ * But ondisk layout is well-formed. So, swap layouts "heal" and
+ * "ondisk". Now "ondisk" layout will be used for healing
+ * xattrs. If there are any non-participating subvols in
+ * "ondisk" layout, dht_selfheal_dir_xattr_persubvol will set
+ * 0-0 and non-layout xattrs. This way we won't end up in
+ * "corrupting" already set and well-formed "ondisk" layout.
*/
- heal_missing_dirs = local->selfheal.force_mkdir
- ? local->selfheal.force_mkdir : dht_layout_missing_dirs (*heal);
-
- if ((local->selfheal.hole_cnt == 0)
- && (local->selfheal.overlaps_cnt == 0) && heal_missing_dirs) {
- dht_layout_t *tmp = NULL;
-
- /* Just added a brick and need to set 0-0 range on this brick.
- * But ondisk layout is well-formed. So, swap layouts "heal" and
- * "ondisk". Now "ondisk" layout will be used for healing
- * xattrs. If there are any non-participating subvols in
- * "ondisk" layout, dht_selfheal_dir_xattr_persubvol will set
- * 0-0 and non-layout xattrs. This way we won't end up in
- * "corrupting" already set and well-formed "ondisk" layout.
- */
- tmp = *heal;
- *heal = *ondisk;
- *ondisk = tmp;
-
- /* Current selfheal code, heals non-layout xattrs only after
- * an add-brick. In fact non-layout xattrs are considered as
- * secondary citizens which are healed only if layout xattrs
- * need to be healed. This is wrong, since for eg., quota can be
- * set when layout is well-formed, but a node is down. Also,
- * just for healing non-layout xattrs, we don't need locking.
- * This issue is _NOT FIXED_ by this patch.
- */
- }
+ tmp = *heal;
+ *heal = *ondisk;
+ *ondisk = tmp;
+
+ /* Current selfheal code, heals non-layout xattrs only after
+ * an add-brick. In fact non-layout xattrs are considered as
+ * secondary citizens which are healed only if layout xattrs
+ * need to be healed. This is wrong, since for eg., quota can be
+ * set when layout is well-formed, but a node is down. Also,
+ * just for healing non-layout xattrs, we don't need locking.
+ * This issue is _NOT FIXED_ by this patch.
+ */
+ }
- fixit = (local->selfheal.hole_cnt || local->selfheal.overlaps_cnt
- || heal_missing_dirs);
+ fixit = (local->selfheal.hole_cnt || local->selfheal.overlaps_cnt ||
+ heal_missing_dirs);
out:
- return fixit;
+ return fixit;
}
-inline int
-dht_layout_span (dht_layout_t *layout)
+int
+dht_layout_span(dht_layout_t *layout)
{
- int i = 0, count = 0;
+ int i = 0, count = 0;
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err)
- continue;
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err)
+ continue;
- if (layout->list[i].start != layout->list[i].stop)
- count++;
- }
+ if (layout->list[i].start != layout->list[i].stop)
+ count++;
+ }
- return count;
+ return count;
}
int
-dht_decommissioned_bricks_in_layout (xlator_t *this, dht_layout_t *layout)
+dht_decommissioned_bricks_in_layout(xlator_t *this, dht_layout_t *layout)
{
- dht_conf_t *conf = NULL;
- int count = 0, i = 0, j = 0;
+ dht_conf_t *conf = NULL;
+ int count = 0, i = 0, j = 0;
- if ((this == NULL) || (layout == NULL))
- goto out;
+ if ((this == NULL) || (layout == NULL))
+ goto out;
- conf = this->private;
+ conf = this->private;
- for (i = 0; i < layout->cnt; i++) {
- for (j = 0; j < conf->subvolume_cnt; j++) {
- if (conf->decommissioned_bricks[j] &&
- conf->decommissioned_bricks[j]
- == layout->list[i].xlator) {
- count++;
- }
- }
+ for (i = 0; i < layout->cnt; i++) {
+ for (j = 0; j < conf->subvolume_cnt; j++) {
+ if (conf->decommissioned_bricks[j] &&
+ conf->decommissioned_bricks[j] == layout->list[i].xlator) {
+ count++;
+ }
}
+ }
out:
- return count;
+ return count;
}
dht_distribution_type_t
-dht_distribution_type (xlator_t *this, dht_layout_t *layout)
+dht_distribution_type(xlator_t *this, dht_layout_t *layout)
{
- dht_distribution_type_t type = GF_DHT_EQUAL_DISTRIBUTION;
- int i = 0;
- uint32_t start_range = 0, range = 0, diff = 0;
+ dht_distribution_type_t type = GF_DHT_EQUAL_DISTRIBUTION;
+ int i = 0;
+ uint32_t start_range = 0, range = 0, diff = 0;
- if ((this == NULL) || (layout == NULL) || (layout->cnt < 1)) {
- goto out;
- }
+ if ((this == NULL) || (layout == NULL) || (layout->cnt < 1)) {
+ goto out;
+ }
- for (i = 0; i < layout->cnt; i++) {
- if (start_range == 0) {
- start_range = layout->list[i].stop
- - layout->list[i].start;
- continue;
- }
+ for (i = 0; i < layout->cnt; i++) {
+ if (start_range == 0) {
+ start_range = layout->list[i].stop - layout->list[i].start;
+ continue;
+ }
- range = layout->list[i].stop - layout->list[i].start;
- diff = abs (range - start_range);
+ range = layout->list[i].stop - layout->list[i].start;
+ diff = (range >= start_range) ? range - start_range
+ : start_range - range;
- if ((range != 0) && (diff > layout->cnt)) {
- type = GF_DHT_WEIGHTED_DISTRIBUTION;
- break;
- }
+ if ((range != 0) && (diff > layout->cnt)) {
+ type = GF_DHT_WEIGHTED_DISTRIBUTION;
+ break;
}
+ }
out:
- return type;
+ return type;
}
gf_boolean_t
-dht_should_fix_layout (call_frame_t *frame, dht_layout_t **inmem,
- dht_layout_t **ondisk)
+dht_should_fix_layout(call_frame_t *frame, dht_layout_t **inmem,
+ dht_layout_t **ondisk)
{
- gf_boolean_t fixit = _gf_true;
- dht_local_t *local = NULL;
- int layout_span = 0;
- int decommissioned_bricks = 0;
- int ret = 0;
- dht_conf_t *conf = NULL;
- dht_distribution_type_t inmem_dist_type = 0;
- dht_distribution_type_t ondisk_dist_type = 0;
-
- conf = frame->this->private;
-
- local = frame->local;
-
- if ((inmem == NULL) || (*inmem == NULL) || (ondisk == NULL)
- || (*ondisk == NULL))
- goto out;
-
- ret = dht_layout_anomalies (frame->this, &local->loc, *ondisk,
- &local->selfheal.hole_cnt,
- &local->selfheal.overlaps_cnt, NULL,
- &local->selfheal.down,
- &local->selfheal.misc, NULL);
- if (ret < 0) {
- fixit = _gf_false;
- goto out;
- }
+ gf_boolean_t fixit = _gf_true;
- if (local->selfheal.down || local->selfheal.misc) {
- fixit = _gf_false;
- goto out;
- }
+ dht_local_t *local = NULL;
+ int layout_span = 0;
+ int decommissioned_bricks = 0;
+ dht_conf_t *conf = NULL;
+ dht_distribution_type_t inmem_dist_type = 0;
+ dht_distribution_type_t ondisk_dist_type = 0;
- if (local->selfheal.hole_cnt || local->selfheal.overlaps_cnt)
- goto out;
+ conf = frame->this->private;
- layout_span = dht_layout_span (*ondisk);
+ local = frame->local;
- decommissioned_bricks
- = dht_decommissioned_bricks_in_layout (frame->this,
- *ondisk);
- inmem_dist_type = dht_distribution_type (frame->this, *inmem);
- ondisk_dist_type = dht_distribution_type (frame->this, *ondisk);
+ if ((inmem == NULL) || (*inmem == NULL) || (ondisk == NULL) ||
+ (*ondisk == NULL))
+ goto out;
- if ((decommissioned_bricks == 0)
- && (layout_span == (conf->subvolume_cnt
- - conf->decommission_subvols_cnt))
- && (inmem_dist_type == ondisk_dist_type))
- fixit = _gf_false;
+ dht_layout_anomalies(frame->this, &local->loc, *ondisk,
+ &local->selfheal.hole_cnt,
+ &local->selfheal.overlaps_cnt, NULL,
+ &local->selfheal.down, &local->selfheal.misc, NULL);
-out:
- return fixit;
-}
+ if (local->selfheal.down || local->selfheal.misc) {
+ fixit = _gf_false;
+ goto out;
+ }
-int
-dht_selfheal_layout_lock (call_frame_t *frame, dht_layout_t *layout,
- gf_boolean_t newdir,
- dht_selfheal_layout_t healer,
- dht_need_heal_t should_heal)
-{
- dht_local_t *local = NULL;
- int count = 1, ret = -1, i = 0;
- dht_lock_t **lk_array = NULL;
- dht_conf_t *conf = NULL;
- dht_layout_t *tmp = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO (frame->this->name, frame->local, err);
-
- local = frame->local;
-
- conf = frame->this->private;
+ if (local->selfheal.hole_cnt || local->selfheal.overlaps_cnt)
+ goto out;
- local->selfheal.healer = healer;
- local->selfheal.should_heal = should_heal;
+ /* If commit hashes are being updated, let it through */
+ if ((*inmem)->commit_hash != (*ondisk)->commit_hash)
+ goto out;
- tmp = local->selfheal.layout;
- local->selfheal.layout = dht_layout_ref (frame->this, layout);
- dht_layout_unref (frame->this, tmp);
+ layout_span = dht_layout_span(*ondisk);
- if (!newdir) {
- count = conf->subvolume_cnt;
+ decommissioned_bricks = dht_decommissioned_bricks_in_layout(frame->this,
+ *ondisk);
+ inmem_dist_type = dht_distribution_type(frame->this, *inmem);
+ ondisk_dist_type = dht_distribution_type(frame->this, *ondisk);
- lk_array = GF_CALLOC (count, sizeof (*lk_array),
- gf_common_mt_char);
- if (lk_array == NULL)
- goto err;
+ if ((decommissioned_bricks == 0) &&
+ (layout_span ==
+ (conf->subvolume_cnt - conf->decommission_subvols_cnt)) &&
+ (inmem_dist_type == ondisk_dist_type))
+ fixit = _gf_false;
- for (i = 0; i < count; i++) {
- lk_array[i] = dht_lock_new (frame->this,
- conf->subvolumes[i],
- &local->loc, F_WRLCK,
- DHT_LAYOUT_HEAL_DOMAIN);
- if (lk_array[i] == NULL)
- goto err;
- }
- } else {
- count = 1;
- lk_array = GF_CALLOC (count, sizeof (*lk_array),
- gf_common_mt_char);
- if (lk_array == NULL)
- goto err;
-
- lk_array[0] = dht_lock_new (frame->this, local->hashed_subvol,
- &local->loc, F_WRLCK,
- DHT_LAYOUT_HEAL_DOMAIN);
- if (lk_array[0] == NULL)
- goto err;
- }
+out:
- local->lock.locks = lk_array;
- local->lock.lk_count = count;
+ return fixit;
+}
- ret = dht_blocking_inodelk (frame, lk_array, count,
- dht_selfheal_layout_lock_cbk);
- if (ret < 0) {
- local->lock.locks = NULL;
- local->lock.lk_count = 0;
+static int
+dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
+ gf_boolean_t newdir, dht_selfheal_layout_t healer,
+ dht_need_heal_t should_heal)
+{
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1, i = 0;
+ dht_lock_t **lk_array = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *tmp = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO(frame->this->name, frame->local, err);
+
+ local = frame->local;
+
+ conf = frame->this->private;
+
+ local->selfheal.healer = healer;
+ local->selfheal.should_heal = should_heal;
+
+ tmp = local->selfheal.layout;
+ local->selfheal.layout = dht_layout_ref(frame->this, layout);
+ dht_layout_unref(frame->this, tmp);
+
+ if (!newdir) {
+ count = conf->subvolume_cnt;
+
+ lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char);
+ if (lk_array == NULL) {
+ gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
+ gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL);
+ goto err;
+ }
+
+ for (i = 0; i < count; i++) {
+ lk_array[i] = dht_lock_new(
+ frame->this, conf->subvolumes[i], &local->loc, F_WRLCK,
+ DHT_LAYOUT_HEAL_DOMAIN, NULL, FAIL_ON_ANY_ERROR);
+ if (lk_array[i] == NULL) {
+ gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_MEM_ALLOC_FAILED, "lk_array-gfid=%s", gfid,
+ "path=%s", local->loc.path, NULL);
goto err;
- }
-
- return 0;
+ }
+ }
+ } else {
+ count = 1;
+ lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char);
+ if (lk_array == NULL) {
+ gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL);
+ goto err;
+ }
+
+ lk_array[0] = dht_lock_new(frame->this, local->hashed_subvol,
+ &local->loc, F_WRLCK, DHT_LAYOUT_HEAL_DOMAIN,
+ NULL, FAIL_ON_ANY_ERROR);
+ if (lk_array[0] == NULL) {
+ gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL);
+ goto err;
+ }
+ }
+
+ local->lock[0].layout.my_layout.locks = lk_array;
+ local->lock[0].layout.my_layout.lk_count = count;
+
+ ret = dht_blocking_inodelk(frame, lk_array, count,
+ dht_selfheal_layout_lock_cbk);
+ if (ret < 0) {
+ local->lock[0].layout.my_layout.locks = NULL;
+ local->lock[0].layout.my_layout.lk_count = 0;
+ goto err;
+ }
+
+ return 0;
err:
- if (lk_array != NULL) {
- int tmp_count = 0, i = 0;
+ if (lk_array != NULL) {
+ dht_lock_array_free(lk_array, count);
+ GF_FREE(lk_array);
+ }
- for (i = 0; (i < count) && (lk_array[i]); i++, tmp_count++) {
- ;
- }
-
- dht_lock_array_free (lk_array, tmp_count);
- GF_FREE (lk_array);
- }
-
- return -1;
+ return -1;
}
-int
-dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+static int
+dht_selfheal_dir_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *subvol = NULL;
- int i = 0;
- dht_layout_t *layout = NULL;
- int err = 0;
- int this_call_cnt = 0;
-
- local = frame->local;
- layout = local->selfheal.layout;
- prev = cookie;
- subvol = prev->this;
-
- if (op_ret == 0)
- err = 0;
- else
- err = op_errno;
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].xlator == subvol) {
- layout->list[i].err = err;
- break;
- }
- }
-
- this_call_cnt = dht_frame_return (frame);
-
- if (is_last_call (this_call_cnt)) {
- dht_selfheal_dir_finish (frame, this, 0);
- }
-
- return 0;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ struct iatt *stbuf = NULL;
+ int i = 0;
+ int ret = 0;
+ dht_layout_t *layout = NULL;
+ int err = 0;
+ int this_call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ layout = local->selfheal.layout;
+ subvol = cookie;
+
+ if (op_ret == 0) {
+ err = 0;
+ } else {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "name=%s", subvol->name,
+ "path=%s", local->loc.path, "gfid=%s", gfid, NULL);
+ err = op_errno;
+ }
+
+ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
+ if (ret < 0) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ gf_msg_debug(this->name, 0,
+ "key = %s not present in dict"
+ ", path:%s gfid:%s",
+ DHT_IATT_IN_XDATA_KEY, local->loc.path, gfid);
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol) {
+ layout->list[i].err = err;
+ break;
+ }
+ }
+
+ LOCK(&frame->lock);
+ {
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ }
+ UNLOCK(&frame->lock);
+
+ this_call_cnt = dht_frame_return(frame);
+
+ if (is_last_call(this_call_cnt)) {
+ dht_selfheal_dir_finish(frame, this, 0, 1);
+ }
+
+ return 0;
}
-
+/* Code is required to set user xattr to local->xattr
+ */
int
-dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout, int i,
- xlator_t *req_subvol)
+dht_set_user_xattr(dict_t *dict, char *k, data_t *v, void *data)
{
- xlator_t *subvol = NULL;
- dict_t *xattr = NULL;
- int ret = 0;
- xlator_t *this = NULL;
- int32_t *disk_layout = NULL;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- data_t *data = NULL;
- char gfid[GF_UUID_BUF_SIZE] = {0};
-
- local = frame->local;
- if (req_subvol)
- subvol = req_subvol;
- else
- subvol = layout->list[i].xlator;
- this = frame->this;
-
- GF_VALIDATE_OR_GOTO ("", this, err);
- GF_VALIDATE_OR_GOTO (this->name, layout, err);
- GF_VALIDATE_OR_GOTO (this->name, local, err);
- GF_VALIDATE_OR_GOTO (this->name, subvol, err);
- VALIDATE_OR_GOTO (this->private, err);
-
- conf = this->private;
-
- xattr = get_new_dict ();
- if (!xattr) {
- goto err;
- }
-
- gf_uuid_unparse(loc->inode->gfid, gfid);
-
- ret = dht_disk_layout_extract (this, layout, i, &disk_layout);
- if (ret == -1) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory self heal xattr failed:"
- " %s: (subvol %s) Failed to extract disk layout,"
- " gfid = %s", loc->path, subvol->name, gfid);
- goto err;
- }
-
- ret = dict_set_bin (xattr, conf->xattr_name, disk_layout, 4 * 4);
- if (ret == -1) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory self heal xattr failed:"
- "%s: (subvol %s) Failed to set xattr dictionary,"
- " gfid = %s", loc->path, subvol->name, gfid);
- goto err;
- }
- disk_layout = NULL;
-
- gf_msg_trace (this->name, 0,
- "setting hash range %u - %u (type %d) on subvolume %s"
- " for %s", layout->list[i].start, layout->list[i].stop,
- layout->type, subvol->name, loc->path);
-
- dict_ref (xattr);
- if (local->xattr) {
- data = dict_get (local->xattr, QUOTA_LIMIT_KEY);
- if (data) {
- ret = dict_add (xattr, QUOTA_LIMIT_KEY, data);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:"
- " key = %s",
- loc->path, QUOTA_LIMIT_KEY);
- }
- }
- }
-
- if (!gf_uuid_is_null (local->gfid))
- gf_uuid_copy (loc->gfid, local->gfid);
+ dict_t *set_xattr = data;
+ int ret = -1;
- STACK_WIND (frame, dht_selfheal_dir_xattr_cbk,
- subvol, subvol->fops->setxattr,
- loc, xattr, 0, NULL);
-
- dict_unref (xattr);
+ ret = dict_set(set_xattr, k, v);
+ return ret;
+}
- return 0;
+static int
+dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout, int i,
+ xlator_t *req_subvol)
+{
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ dict_t *xdata = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+ int32_t *disk_layout = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ data_t *data = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ if (req_subvol)
+ subvol = req_subvol;
+ else
+ subvol = layout->list[i].xlator;
+ this = frame->this;
+
+ GF_VALIDATE_OR_GOTO("", this, err);
+ GF_VALIDATE_OR_GOTO(this->name, layout, err);
+ GF_VALIDATE_OR_GOTO(this->name, local, err);
+ GF_VALIDATE_OR_GOTO(this->name, subvol, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ xattr = dict_new();
+ if (!xattr) {
+ goto err;
+ }
+
+ xdata = dict_new();
+ if (!xdata)
+ goto err;
+
+ ret = dict_set_str(xdata, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", GLUSTERFS_INTERNAL_FOP_KEY,
+ "gfid=%s", gfid, NULL);
+ goto err;
+ }
+
+ ret = dict_set_int8(xdata, DHT_IATT_IN_XDATA_KEY, 1);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", DHT_IATT_IN_XDATA_KEY,
+ "gfid=%s", gfid, NULL);
+ goto err;
+ }
+
+ gf_uuid_unparse(loc->inode->gfid, gfid);
+
+ ret = dht_disk_layout_extract(this, layout, i, &disk_layout);
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
+ "extract-disk-layout-failed, path=%s", loc->path, "subvol=%s",
+ subvol->name, "gfid=%s", gfid, NULL);
+ goto err;
+ }
+
+ ret = dict_set_bin(xattr, conf->xattr_name, disk_layout, 4 * 4);
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "path=%s", loc->path,
+ "subvol=%s", subvol->name,
+ "set-xattr-dictionary-failed"
+ "gfid=%s",
+ gfid, NULL);
+ goto err;
+ }
+ disk_layout = NULL;
+
+ gf_msg_trace(this->name, 0,
+ "setting hash range 0x%x - 0x%x (type %d) on subvolume %s"
+ " for %s",
+ layout->list[i].start, layout->list[i].stop, layout->type,
+ subvol->name, loc->path);
+
+ if (local->xattr) {
+ data = dict_get(local->xattr, QUOTA_LIMIT_KEY);
+ if (data) {
+ ret = dict_add(xattr, QUOTA_LIMIT_KEY, data);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", QUOTA_LIMIT_KEY, NULL);
+ }
+ }
+ data = dict_get(local->xattr, QUOTA_LIMIT_OBJECTS_KEY);
+ if (data) {
+ ret = dict_add(xattr, QUOTA_LIMIT_OBJECTS_KEY, data);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", QUOTA_LIMIT_OBJECTS_KEY,
+ NULL);
+ }
+ }
+ }
+
+ if (!gf_uuid_is_null(local->gfid))
+ gf_uuid_copy(loc->gfid, local->gfid);
+
+ STACK_WIND_COOKIE(frame, dht_selfheal_dir_xattr_cbk, (void *)subvol, subvol,
+ subvol->fops->setxattr, loc, xattr, 0, xdata);
+
+ dict_unref(xattr);
+ dict_unref(xdata);
+
+ return 0;
err:
- if (xattr)
- dict_destroy (xattr);
+ if (xattr)
+ dict_unref(xattr);
+ if (xdata)
+ dict_unref(xdata);
- GF_FREE (disk_layout);
+ GF_FREE(disk_layout);
- dht_selfheal_dir_xattr_cbk (frame, subvol, frame->this,
- -1, ENOMEM, NULL);
- return 0;
+ dht_selfheal_dir_xattr_cbk(frame, (void *)subvol, frame->this, -1, ENOMEM,
+ NULL);
+ return 0;
}
-int
-dht_fix_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
+static int
+dht_fix_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
{
- dht_local_t *local = NULL;
- int i = 0;
- int count = 0;
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- dht_layout_t *dummy = NULL;
-
- local = frame->local;
- this = frame->this;
- conf = this->private;
+ dht_local_t *local = NULL;
+ int i = 0;
+ int count = 0;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *dummy = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ conf = this->private;
+
+ gf_msg_debug(this->name, 0, "%s: Writing the new range for all subvolumes",
+ loc->path);
+
+ local->call_cnt = count = conf->subvolume_cnt;
+
+ if (gf_log_get_loglevel() >= GF_LOG_DEBUG)
+ dht_log_new_layout_for_dir_selfheal(this, loc, layout);
+
+ for (i = 0; i < layout->cnt; i++) {
+ dht_selfheal_dir_xattr_persubvol(frame, loc, layout, i, NULL);
+
+ if (--count == 0)
+ goto out;
+ }
+ /* if we are here, subvolcount > layout_count. subvols-per-directory
+ * option might be set here. We need to clear out layout from the
+ * non-participating subvolumes, else it will result in overlaps */
+ dummy = dht_layout_new(this, 1);
+ if (!dummy)
+ goto out;
+ dummy->commit_hash = layout->commit_hash;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (_gf_false == dht_is_subvol_in_layout(layout, conf->subvolumes[i])) {
+ dht_selfheal_dir_xattr_persubvol(frame, loc, dummy, 0,
+ conf->subvolumes[i]);
+ if (--count == 0)
+ break;
+ }
+ }
+
+ dht_layout_unref(this, dummy);
+out:
+ return 0;
+}
- gf_msg_debug (this->name, 0,
- "%s: Writing the new range for all subvolumes",
- loc->path);
+static int
+dht_selfheal_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ int missing_xattr = 0;
+ int i = 0;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *dummy = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+
+ local = frame->local;
+ this = frame->this;
+ conf = this->private;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err != -1 || !layout->list[i].stop) {
+ /* err != -1 would mean xattr present on the directory
+ * or the directory is non existent.
+ * !layout->list[i].stop would mean layout absent
+ */
+
+ continue;
+ }
+ missing_xattr++;
+ }
+ /* Also account for subvolumes with no-layout. Used for zero'ing out
+ * the layouts and for setting quota key's if present */
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (_gf_false == dht_is_subvol_in_layout(layout, conf->subvolumes[i])) {
+ missing_xattr++;
+ }
+ }
+ gf_msg_trace(this->name, 0, "%d subvolumes missing xattr for %s",
+ missing_xattr, loc->path);
+
+ if (missing_xattr == 0) {
+ dht_selfheal_dir_finish(frame, this, 0, 1);
+ return 0;
+ }
- local->call_cnt = count = conf->subvolume_cnt;
+ local->call_cnt = missing_xattr;
- dht_log_new_layout_for_dir_selfheal (this, loc, layout);
+ if (gf_log_get_loglevel() >= GF_LOG_DEBUG)
+ dht_log_new_layout_for_dir_selfheal(this, loc, layout);
- for (i = 0; i < layout->cnt; i++) {
- dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL);
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err != -1 || !layout->list[i].stop)
+ continue;
- if (--count == 0)
- goto out;
- }
- /* if we are here, subvolcount > layout_count. subvols-per-directory
- * option might be set here. We need to clear out layout from the
- * non-participating subvolumes, else it will result in overlaps */
- dummy = dht_layout_new (this, 1);
- if (!dummy)
- goto out;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (_gf_false ==
- dht_is_subvol_in_layout (layout, conf->subvolumes[i])) {
- dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0,
- conf->subvolumes[i]);
- if (--count == 0)
- break;
- }
- }
+ dht_selfheal_dir_xattr_persubvol(frame, loc, layout, i, NULL);
- dht_layout_unref (this, dummy);
+ if (--missing_xattr == 0)
+ break;
+ }
+ dummy = dht_layout_new(this, 1);
+ if (!dummy) {
+ gf_uuid_unparse(loc->gfid, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_DUMMY_ALLOC_FAILED,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+ for (i = 0; i < conf->subvolume_cnt && missing_xattr; i++) {
+ if (_gf_false == dht_is_subvol_in_layout(layout, conf->subvolumes[i])) {
+ dht_selfheal_dir_xattr_persubvol(frame, loc, dummy, 0,
+ conf->subvolumes[i]);
+ missing_xattr--;
+ }
+ }
+
+ dht_layout_unref(this, dummy);
out:
- return 0;
+ return 0;
}
int
-dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
+dht_selfheal_dir_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int missing_xattr = 0;
- int i = 0;
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- dht_layout_t *dummy = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int this_call_cnt = 0, ret = -1;
- local = frame->local;
- this = frame->this;
- conf = this->private;
+ local = frame->local;
+ layout = local->selfheal.layout;
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err != -1 || !layout->list[i].stop) {
- /* err != -1 would mean xattr present on the directory
- * or the directory is non existent.
- * !layout->list[i].stop would mean layout absent
- */
+ this_call_cnt = dht_frame_return(frame);
- continue;
- }
- missing_xattr++;
- }
- /* Also account for subvolumes with no-layout. Used for zero'ing out
- * the layouts and for setting quota key's if present */
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (_gf_false ==
- dht_is_subvol_in_layout (layout, conf->subvolumes[i])) {
- missing_xattr++;
- }
- }
- gf_msg_trace (this->name, 0,
- "%d subvolumes missing xattr for %s",
- missing_xattr, loc->path);
+ if (is_last_call(this_call_cnt)) {
+ if (!local->heal_layout) {
+ gf_msg_trace(this->name, 0, "Skip heal layout for %s gfid = %s ",
+ local->loc.path, uuid_utoa(local->gfid));
- if (missing_xattr == 0) {
- dht_selfheal_dir_finish (frame, this, 0);
- return 0;
+ dht_selfheal_dir_finish(frame, this, 0, 1);
+ return 0;
}
+ ret = dht_selfheal_layout_lock(frame, layout, _gf_false,
+ dht_selfheal_dir_xattr,
+ dht_should_heal_layout);
- local->call_cnt = missing_xattr;
-
- dht_log_new_layout_for_dir_selfheal (this, loc, layout);
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err != -1 || !layout->list[i].stop)
- continue;
-
- dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL);
-
- if (--missing_xattr == 0)
- break;
- }
- dummy = dht_layout_new (this, 1);
- if (!dummy)
- goto out;
- for (i = 0; i < conf->subvolume_cnt && missing_xattr; i++) {
- if (_gf_false ==
- dht_is_subvol_in_layout (layout, conf->subvolumes[i])) {
- dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0,
- conf->subvolumes[i]);
- missing_xattr--;
- }
+ if (ret < 0) {
+ dht_selfheal_dir_finish(frame, this, -1, 1);
}
+ }
- dht_layout_unref (this, dummy);
-out:
- return 0;
+ return 0;
}
-gf_boolean_t
-dht_is_subvol_part_of_layout (dht_layout_t *layout, xlator_t *xlator)
+int
+dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dht_layout_t *layout)
{
- int i = 0;
- gf_boolean_t ret = _gf_false;
-
- for (i = 0; i < layout->cnt; i++) {
- if (!strcmp (layout->list[i].xlator->name, xlator->name)) {
- ret = _gf_true;
- break;
+ int missing_attr = 0;
+ int i = 0, ret = -1;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ int cnt = 0;
+
+ local = frame->local;
+ this = frame->this;
+ conf = this->private;
+
+ /* We need to heal the attrs if:
+ * 1. Any directories were missing - the newly created dirs will need
+ * to have the correct attrs set
+ * 2. An existing dir does not have the correct permissions -they may
+ * have been changed when a brick was down.
+ */
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == -1)
+ missing_attr++;
+ }
+
+ if ((missing_attr == 0) && (local->need_attrheal == 0)) {
+ if (!local->heal_layout) {
+ gf_msg_trace(this->name, 0, "Skip heal layout for %s gfid = %s ",
+ loc->path, uuid_utoa(loc->gfid));
+ dht_selfheal_dir_finish(frame, this, 0, 1);
+ return 0;
+ }
+ ret = dht_selfheal_layout_lock(frame, layout, _gf_false,
+ dht_selfheal_dir_xattr,
+ dht_should_heal_layout);
- }
+ if (ret < 0) {
+ dht_selfheal_dir_finish(frame, this, -1, 1);
}
- return ret;
-}
+ return 0;
+ }
-int
-dht_layout_index_from_conf (dht_layout_t *layout, xlator_t *xlator)
-{
- int i = -1;
- int j = 0;
+ cnt = local->call_cnt = conf->subvolume_cnt;
- for (j = 0; j < layout->cnt; j++) {
- if (!strcmp (layout->list[j].xlator->name, xlator->name)) {
- i = j;
- break;
- }
- }
+ for (i = 0; i < cnt; i++) {
+ STACK_WIND(frame, dht_selfheal_dir_setattr_cbk, layout->list[i].xlator,
+ layout->list[i].xlator->fops->setattr, loc, stbuf, valid,
+ NULL);
+ }
- return i;
+ return 0;
}
-
static int
-dht_selfheal_dir_xattr_for_nameless_lookup (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout)
+dht_selfheal_dir_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int missing_xattr = 0;
- int i = 0;
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- dht_layout_t *dummy = NULL;
- int j = 0;
-
- local = frame->local;
- this = frame->this;
- conf = this->private;
-
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *subvol = NULL;
+ int i = 0, ret = -1;
+ int this_call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ layout = local->selfheal.layout;
+ prev = cookie;
+ subvol = prev;
+
+ if ((op_ret == 0) || ((op_ret == -1) && (op_errno == EEXIST))) {
for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err != -1 || !layout->list[i].stop) {
- /* err != -1 would mean xattr present on the directory
- or the directory is non existent.
- !layout->list[i].stop would mean layout absent
- */
+ if (layout->list[i].xlator == subvol) {
+ layout->list[i].err = -1;
+ break;
+ }
+ }
+ }
+
+ if (op_ret) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ gf_smsg(this->name,
+ ((op_errno == EEXIST) ? GF_LOG_DEBUG : GF_LOG_WARNING),
+ op_errno, DHT_MSG_DIR_SELFHEAL_FAILED, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+ dht_iatt_merge(this, &local->preparent, preparent);
+ dht_iatt_merge(this, &local->postparent, postparent);
+ ret = 0;
- continue;
- }
- missing_xattr++;
- }
+out:
+ this_call_cnt = dht_frame_return(frame);
- /* Also account for subvolumes with no-layout. Used for zero'ing out
- the layouts and for setting quota key's if present */
+ if (is_last_call(this_call_cnt)) {
+ dht_selfheal_dir_finish(frame, this, ret, 0);
+ dht_selfheal_dir_setattr(frame, &local->loc, &local->stbuf, 0xffffff,
+ layout);
+ }
- /* Send where either the subvol is not part of layout,
- * or it is part of the layout but error is non-zero but error
- * is not equal to -1 or ENOENT.
- */
+ return 0;
+}
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (dht_is_subvol_part_of_layout (layout, conf->subvolumes[i])
- == _gf_false) {
- missing_xattr++;
- continue;
- }
+static int
+dht_selfheal_dir_mkdir_lookup_done(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ int i = 0;
+ dict_t *dict = NULL;
+ dht_layout_t *layout = NULL;
+ loc_t *loc = NULL;
+ int cnt = 0;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO(this->private, err);
+
+ local = frame->local;
+ layout = local->layout;
+ loc = &local->loc;
+
+ if (!gf_uuid_is_null(local->gfid)) {
+ dict = dict_new();
+ if (!dict)
+ return -1;
- j = dht_layout_index_from_conf (layout, conf->subvolumes[i]);
+ ret = dict_set_gfuuid(dict, "gfid-req", local->gfid, true);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=gfid-req", NULL);
+ } else if (local->params) {
+ /* Send the dictionary from higher layers directly */
+
+ dict = dict_ref(local->params);
+ }
+ /* Code to update all extended attributed from local->xattr
+ to dict
+ */
+ dht_dir_set_heal_xattr(this, local, dict, local->xattr, NULL, NULL);
+
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_IS_NULL, NULL);
+ dict = dict_new();
+ if (!dict)
+ return -1;
+ }
+ ret = dict_set_flag(dict, GF_INTERNAL_CTX_KEY, GF_DHT_HEAL_DIR);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, "key=%s",
+ GF_INTERNAL_CTX_KEY, "path=%s", loc->path, NULL);
+ /* We can still continue. As heal can still happen
+ * unless quota limits have reached for the dir.
+ */
+ }
- if ((j != -1) && (layout->list[j].err != -1) &&
- (layout->list[j].err != 0) &&
- (layout->list[j].err != ENOENT)) {
- missing_xattr++;
- }
+ cnt = layout->cnt;
+ for (i = 0; i < cnt; i++) {
+ if (layout->list[i].err == ESTALE || layout->list[i].err == ENOENT ||
+ local->selfheal.force_mkdir) {
+ gf_msg_debug(this->name, 0, "Creating directory %s on subvol %s",
+ loc->path, layout->list[i].xlator->name);
+ STACK_WIND_COOKIE(
+ frame, dht_selfheal_dir_mkdir_cbk, layout->list[i].xlator,
+ layout->list[i].xlator, layout->list[i].xlator->fops->mkdir,
+ loc,
+ st_mode_from_ia(local->stbuf.ia_prot, local->stbuf.ia_type), 0,
+ dict);
}
+ }
+ if (dict)
+ dict_unref(dict);
- gf_log (this->name, GF_LOG_TRACE,
- "%d subvolumes missing xattr for %s",
- missing_xattr, loc->path);
+ return 0;
- if (missing_xattr == 0) {
- dht_selfheal_dir_finish (frame, this, 0);
- return 0;
- }
+err:
+ dht_selfheal_dir_finish(frame, this, -1, 1);
+ return 0;
+}
- local->call_cnt = missing_xattr;
+static int
+dht_selfheal_dir_mkdir_lookup_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int i = 0;
+ int this_call_cnt = 0;
+ int missing_dirs = 0;
+ dht_layout_t *layout = NULL;
+ xlator_t *prev = 0;
+ loc_t *loc = NULL;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ int index = -1;
+
+ VALIDATE_OR_GOTO(this->private, err);
+
+ local = frame->local;
+ layout = local->layout;
+ loc = &local->loc;
+ prev = cookie;
+
+ if (!gf_uuid_is_null(local->gfid))
+ gf_uuid_unparse(local->gfid, gfid_local);
+
+ LOCK(&frame->lock);
+ {
+ index = dht_layout_index_for_subvol(layout, prev);
+ if ((op_ret < 0) && (op_errno == ENOENT || op_errno == ESTALE)) {
+ local->selfheal.hole_cnt = !local->selfheal.hole_cnt
+ ? 1
+ : local->selfheal.hole_cnt + 1;
+ /* the status might have changed. Update the layout with the
+ * new status
+ */
+ if (index >= 0) {
+ layout->list[index].err = op_errno;
+ }
+ }
+
+ if (!op_ret) {
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ if (prev == local->mds_subvol) {
+ dict_unref(local->xattr);
+ local->xattr = dict_ref(xattr);
+ }
+ /* the status might have changed. Update the layout with the
+ * new status
+ */
+ if (index >= 0) {
+ layout->list[index].err = -1;
+ }
+ }
+ }
+ UNLOCK(&frame->lock);
+
+ this_call_cnt = dht_frame_return(frame);
+
+ if (is_last_call(this_call_cnt)) {
+ if (local->selfheal.hole_cnt == layout->cnt) {
+ gf_msg_debug(this->name, op_errno,
+ "Lookup failed, an rmdir could have "
+ "deleted this entry %s",
+ loc->name);
+ local->op_errno = op_errno;
+ goto err;
+ } else {
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == ENOENT ||
+ layout->list[i].err == ESTALE ||
+ local->selfheal.force_mkdir)
+ missing_dirs++;
+ }
+
+ if (missing_dirs == 0) {
+ dht_selfheal_dir_finish(frame, this, 0, 0);
+ dht_selfheal_dir_setattr(frame, loc, &local->stbuf, 0xffffffff,
+ layout);
+ return 0;
+ }
- dht_log_new_layout_for_dir_selfheal (this, loc, layout);
+ local->call_cnt = missing_dirs;
+ dht_selfheal_dir_mkdir_lookup_done(frame, this);
+ }
+ }
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err != -1 || !layout->list[i].stop)
- continue;
+ return 0;
- dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL);
+err:
+ dht_selfheal_dir_finish(frame, this, -1, 1);
+ return 0;
+}
- if (--missing_xattr == 0)
- break;
+static int
+dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ int ret = -1;
+ xlator_t *mds_subvol = NULL;
+
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+ local = frame->local;
+ mds_subvol = local->mds_subvol;
+
+ local->call_cnt = conf->subvolume_cnt;
+
+ if (op_ret < 0) {
+ if (op_errno == EINVAL) {
+ local->call_cnt = 1;
+ dht_selfheal_dir_mkdir_lookup_done(frame, this);
+ return 0;
+ }
+
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_ENTRYLK_ERROR,
+ "path=%s", local->loc.path, NULL);
+
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ /* After getting locks, perform lookup again to ensure that the
+ directory was not deleted by a racing rmdir
+ */
+ if (!local->xattr_req)
+ local->xattr_req = dict_new();
+
+ ret = dict_set_int32(local->xattr_req, "list-xattr", 1);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, "path=%s",
+ local->loc.path, NULL);
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (mds_subvol && conf->subvolumes[i] == mds_subvol) {
+ STACK_WIND_COOKIE(frame, dht_selfheal_dir_mkdir_lookup_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_selfheal_dir_mkdir_lookup_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ NULL);
}
+ }
- dummy = dht_layout_new (this, 1);
- if (!dummy)
- goto out;
-
- for (i = 0; i < conf->subvolume_cnt && missing_xattr; i++) {
- if (dht_is_subvol_part_of_layout (layout, conf->subvolumes[i])
- == _gf_false) {
- dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0,
- conf->subvolumes[i]);
- missing_xattr--;
- continue;
- }
-
- j = dht_layout_index_from_conf (layout, conf->subvolumes[i]);
-
- if ((j != -1) && (layout->list[j].err != -1) &&
- (layout->list[j].err != ENOENT) &&
- (layout->list[j].err != 0)) {
- dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0,
- conf->subvolumes[i]);
- missing_xattr--;
- }
- }
-
- dht_layout_unref (this, dummy);
-out:
- return 0;
+ return 0;
+err:
+ dht_selfheal_dir_finish(frame, this, -1, 1);
+ return 0;
}
-int
-dht_selfheal_dir_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata)
+static int
+dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
+ int force)
{
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int this_call_cnt = 0, ret = -1;
-
- local = frame->local;
- layout = local->selfheal.layout;
-
- this_call_cnt = dht_frame_return (frame);
+ int missing_dirs = 0;
+ int i = 0;
+ int op_errno = 0;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ conf = this->private;
+
+ local->selfheal.force_mkdir = force;
+ local->selfheal.hole_cnt = 0;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == ENOENT || force)
+ missing_dirs++;
+ }
+
+ if (missing_dirs == 0) {
+ /* We don't need to create any directories. Proceed to heal the
+ * attrs and xattrs
+ */
+ if (!__is_root_gfid(local->stbuf.ia_gfid)) {
+ if (local->need_xattr_heal) {
+ local->need_xattr_heal = 0;
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s",
+ local->loc.path, "gfid=%s", local->gfid, NULL);
+ }
+ } else {
+ if (!gf_uuid_is_null(local->gfid))
+ gf_uuid_copy(loc->gfid, local->gfid);
- if (is_last_call (this_call_cnt)) {
- ret = dht_selfheal_layout_lock (frame, layout, _gf_false,
- dht_selfheal_dir_xattr,
- dht_should_heal_layout);
+ ret = dht_common_mark_mdsxattr(frame, NULL, 0);
+ if (!ret)
+ return 0;
- if (ret < 0) {
- dht_selfheal_dir_finish (frame, this, -1);
- }
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_SET_XATTR_FAILED,
+ "path=%s", local->loc.path, "gfid=%s", local->gfid,
+ NULL);
+ }
}
-
+ dht_selfheal_dir_setattr(frame, loc, &local->stbuf, 0xffffffff, layout);
return 0;
+ }
+
+ /* MDS xattr is populated only while DHT is having more than one
+ subvol.In case of graph switch while adding more dht subvols need to
+ consider hash subvol as a MDS to avoid MDS check failure at the time
+ of running fop on directory
+ */
+ if (!dict_get(local->xattr, conf->mds_xattr_key) &&
+ (conf->subvolume_cnt > 1)) {
+ if (local->hashed_subvol == NULL) {
+ local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (local->hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s",
+ loc->pargfid, "name=%s", loc->name, "path=%s",
+ loc->path, NULL);
+ goto err;
+ }
+ }
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this,
+ local->hashed_subvol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set hashed subvol for %s on inode vol is %s",
+ local->loc.path,
+ local->hashed_subvol ? local->hashed_subvol->name : "NULL");
+ goto err;
+ }
+ }
+
+ if (local->hashed_subvol == NULL) {
+ local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (local->hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", loc->pargfid,
+ "name=%s", loc->name, "path=%s", loc->path, NULL);
+ goto err;
+ }
+ }
+
+ local->current = &local->lock[0];
+ ret = dht_protect_namespace(frame, loc, local->hashed_subvol,
+ &local->current->ns,
+ dht_selfheal_dir_mkdir_lock_cbk);
+
+ if (ret < 0)
+ goto err;
+
+ return 0;
+err:
+ return -1;
}
-
-int
-dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
- int32_t valid, dht_layout_t *layout)
+static int
+dht_selfheal_layout_alloc_start(xlator_t *this, loc_t *loc,
+ dht_layout_t *layout)
{
- int missing_attr = 0;
- int i = 0, ret = -1;
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
-
- local = frame->local;
- this = frame->this;
+ int start = 0;
+ uint32_t hashval = 0;
+ int ret = 0;
+ const char *str = NULL;
+ dht_conf_t *conf = NULL;
+ char buf[UUID_CANONICAL_FORM_LEN + 1] = {
+ 0,
+ };
+
+ conf = this->private;
+
+ if (conf->randomize_by_gfid) {
+ str = uuid_utoa_r(loc->gfid, buf);
+ } else {
+ str = loc->path;
+ }
+
+ ret = dht_hash_compute(this, layout->type, str, &hashval);
+ if (ret == 0) {
+ start = (hashval % layout->cnt);
+ }
+
+ return start;
+}
+static int
+dht_get_layout_count(xlator_t *this, dht_layout_t *layout, int new_layout)
+{
+ int i = 0;
+ int j = 0;
+ int err = 0;
+ int count = 0;
+ dht_conf_t *conf = NULL;
+
+ /* Gets in use only for replace-brick, remove-brick */
+ conf = this->private;
+ for (i = 0; i < layout->cnt; i++) {
+ for (j = 0; j < conf->subvolume_cnt; j++) {
+ if (conf->decommissioned_bricks[j] &&
+ conf->decommissioned_bricks[j] == layout->list[i].xlator) {
+ layout->list[i].err = EINVAL;
+ break;
+ }
+ }
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ err = layout->list[i].err;
+ if (err == -1 || err == 0 || err == ENOENT) {
+ /* Take this with a pinch of salt. The behaviour seems
+ * to be slightly different when this function is
+ * invoked from mkdir codepath. For eg., err == 0 in
+ * mkdir codepath means directory created but xattr
+ * is not set yet.
+ */
+
+ /* Setting list[i].err = -1 is an indication for
+ dht_selfheal_layout_new_directory() to assign
+ a range. We set it to -1 based on any one of
+ the three criteria:
+
+ - err == -1 already, which means directory
+ existed but layout was not set on it.
+
+ - err == 0, which means directory exists and
+ has an old layout piece which will be
+ overwritten now.
+
+ - err == ENOENT, which means directory does
+ not exist (possibly racing with mkdir or
+ finishing half done mkdir). The missing
+ directory will be attempted to be recreated.
+ */
+ count++;
+ if (!err)
+ layout->list[i].err = -1;
+ }
+ }
+
+ /* no subvolume has enough space, but can't stop directory creation */
+ if (!count || !new_layout) {
for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err == -1)
- missing_attr++;
- }
-
- if (missing_attr == 0) {
- ret = dht_selfheal_layout_lock (frame, layout, _gf_false,
- dht_selfheal_dir_xattr,
- dht_should_heal_layout);
-
- if (ret < 0) {
- dht_selfheal_dir_finish (frame, this, -1);
- }
-
- return 0;
- }
+ err = layout->list[i].err;
+ if (err == ENOSPC) {
+ layout->list[i].err = -1;
+ count++;
+ }
+ }
+ }
+
+ /* if layout->spread_cnt is set, check if it is <= available
+ * subvolumes (down brick and decommissioned bricks are considered
+ * un-available). Else return count (available up bricks) */
+ count = ((layout->spread_cnt && (layout->spread_cnt <= count))
+ ? layout->spread_cnt
+ : ((count) ? count : 1));
+
+ return count;
+}
- if (!gf_uuid_is_null (local->gfid))
- gf_uuid_copy (loc->gfid, local->gfid);
+void
+dht_selfheal_layout_new_directory(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *new_layout);
- local->call_cnt = missing_attr;
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err == -1) {
- gf_msg_trace (this->name, 0,
- "%s: setattr on subvol %s, gfid = %s",
- loc->path, layout->list[i].xlator->name,
- uuid_utoa(loc->gfid));
-
- STACK_WIND (frame, dht_selfheal_dir_setattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->setattr,
- loc, stbuf, valid, NULL);
- }
- }
+void
+dht_layout_range_swap(dht_layout_t *layout, int i, int j);
- return 0;
-}
+/*
+ * It's a bit icky using local variables in a macro, but it makes the rest
+ * of the code a lot clearer.
+ */
+#define OV_ENTRY(x, y) table[x * new->cnt + y]
-int
-dht_selfheal_dir_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+static void
+dht_selfheal_layout_maximize_overlap(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *new, dht_layout_t *old)
{
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- call_frame_t *prev = NULL;
- xlator_t *subvol = NULL;
- int i = 0;
- int this_call_cnt = 0;
- char gfid[GF_UUID_BUF_SIZE] = {0};
-
- local = frame->local;
- layout = local->selfheal.layout;
- prev = cookie;
- subvol = prev->this;
-
- if ((op_ret == 0) || ((op_ret == -1) && (op_errno == EEXIST))) {
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].xlator == subvol) {
- layout->list[i].err = -1;
- break;
- }
+ int i = 0;
+ int j = 0;
+ uint32_t curr_overlap = 0;
+ uint32_t max_overlap = 0;
+ int max_overlap_idx = -1;
+ uint32_t overlap = 0;
+ uint32_t *table = NULL;
+
+ dht_layout_sort_volname(old);
+ /* Now both old_layout->list[] and new_layout->list[]
+ are match the same xlators/subvolumes. i.e,
+ old_layout->[i] and new_layout->[i] are referring
+ to the same subvolumes
+ */
+
+ /* Build a table of overlaps between new[i] and old[j]. */
+ table = alloca(sizeof(overlap) * old->cnt * new->cnt);
+ if (!table) {
+ return;
+ }
+ memset(table, 0, sizeof(overlap) * old->cnt * new->cnt);
+ for (i = 0; i < new->cnt; ++i) {
+ for (j = 0; j < old->cnt; ++j) {
+ OV_ENTRY(i, j) = dht_overlap_calc(old, j, new, i);
+ }
+ }
+
+ for (i = 0; i < new->cnt; i++) {
+ if (new->list[i].err > 0) {
+ /* Subvol might be marked for decommission
+ with EINVAL, or some other serious error
+ marked with positive errno.
+ */
+ continue;
+ }
+
+ max_overlap = 0;
+ max_overlap_idx = i;
+ for (j = (i + 1); j < new->cnt; ++j) {
+ if (new->list[j].err > 0) {
+ /* Subvol might be marked for decommission
+ with EINVAL, or some other serious error
+ marked with positive errno.
+ */
+ continue;
+ }
+ /* Calculate the overlap now. */
+ curr_overlap = OV_ENTRY(i, i) + OV_ENTRY(j, j);
+ /* Calculate the overlap after the proposed swap. */
+ overlap = OV_ENTRY(i, j) + OV_ENTRY(j, i);
+ /* Are we better than status quo? */
+ if (overlap > curr_overlap) {
+ overlap -= curr_overlap;
+ /* Are we better than the previous choice? */
+ if (overlap > max_overlap) {
+ max_overlap = overlap;
+ max_overlap_idx = j;
}
+ }
}
- if (op_ret) {
-
- gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg (this->name, ((op_errno == EEXIST) ? GF_LOG_DEBUG :
- GF_LOG_WARNING),
- op_errno, DHT_MSG_DIR_SELFHEAL_FAILED,
- "Directory selfheal failed: path = %s, gfid = %s",
- local->loc.path, gfid );
- goto out;
- }
-
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preparent, preparent, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent, prev->this);
-
-out:
- this_call_cnt = dht_frame_return (frame);
-
- if (is_last_call (this_call_cnt)) {
- dht_selfheal_dir_setattr (frame, &local->loc, &local->stbuf, 0xffffff, layout);
+ if (max_overlap_idx != i) {
+ dht_layout_range_swap(new, i, max_overlap_idx);
+ /* Need to swap the table values too. */
+ for (j = 0; j < old->cnt; ++j) {
+ overlap = OV_ENTRY(i, j);
+ OV_ENTRY(i, j) = OV_ENTRY(max_overlap_idx, j);
+ OV_ENTRY(max_overlap_idx, j) = overlap;
+ }
}
-
- return 0;
+ }
}
-void
-dht_selfheal_dir_mkdir_setacl (dict_t *xattr, dict_t *dict)
+static dht_layout_t *
+dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout)
{
- data_t *acl_default = NULL;
- data_t *acl_access = NULL;
- xlator_t *this = NULL;
- int ret = -1;
-
- GF_ASSERT (xattr);
- GF_ASSERT (dict);
+ int i = 0;
+ xlator_t *this = NULL;
+ dht_layout_t *new_layout = NULL;
+ dht_conf_t *priv = NULL;
+ dht_local_t *local = NULL;
+ uint32_t subvol_down = 0;
+ gf_boolean_t maximize_overlap = _gf_true;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ this = frame->this;
+ priv = this->private;
+ local = frame->local;
+
+ if (layout->type == DHT_HASH_TYPE_DM_USER) {
+ gf_msg_debug(THIS->name, 0, "leaving %s alone", loc->path);
+ goto done;
+ }
+
+ new_layout = dht_layout_new(this, priv->subvolume_cnt);
+ if (!new_layout) {
+ gf_uuid_unparse(loc->gfid, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "new_layout, path=%s", loc->path, "gfid=%s", gfid, NULL);
+ goto done;
+ }
- this = THIS;
- GF_ASSERT (this);
+ /* If a subvolume is down, do not re-write the layout. */
+ dht_layout_anomalies(this, loc, layout, NULL, NULL, NULL, &subvol_down,
+ NULL, NULL);
- acl_default = dict_get (xattr, POSIX_ACL_DEFAULT_XATTR);
+ if (subvol_down) {
+ gf_uuid_unparse(loc->gfid, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_FIX_FAILED,
+ "subvol-down=%u", subvol_down, "Skipping-fix-layout", "path=%s",
+ loc->path, "gfid=%s", gfid, NULL);
+ GF_FREE(new_layout);
+ return NULL;
+ }
+
+ for (i = 0; i < new_layout->cnt; i++) {
+ if (layout->list[i].err != ENOSPC)
+ new_layout->list[i].err = layout->list[i].err;
+ else
+ new_layout->list[i].err = -1;
+
+ new_layout->list[i].xlator = layout->list[i].xlator;
+ }
+
+ new_layout->commit_hash = layout->commit_hash;
+
+ if (priv->du_stats) {
+ for (i = 0; i < priv->subvolume_cnt; ++i) {
+ gf_smsg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_SUBVOL_INFO,
+ "index=%d", i, "name=%s", priv->subvolumes[i]->name,
+ "chunks=%u", priv->du_stats[i].chunks, "path=%s", loc->path,
+ NULL);
+
+ /* Maximize overlap if the bricks are all the same
+ * size.
+ * This is probably not going to be very common on
+ * live setups but will benefit our regression tests
+ */
+ if (i && (priv->du_stats[i].chunks != priv->du_stats[0].chunks)) {
+ maximize_overlap = _gf_false;
+ }
+ }
+ } else {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_DISK_USAGE_STATUS,
+ NULL);
+ }
+
+ /* First give it a layout as though it is a new directory. This
+ ensures rotation to kick in */
+ dht_layout_sort_volname(new_layout);
+ dht_selfheal_layout_new_directory(frame, loc, new_layout);
+
+ /* Maximize overlap if weighted-rebalance is disabled */
+ if (!priv->do_weighting)
+ maximize_overlap = _gf_true;
+
+ /* Now selectively re-assign ranges only when it helps */
+ if (maximize_overlap) {
+ dht_selfheal_layout_maximize_overlap(frame, loc, new_layout, layout);
+ }
+done:
+ if (new_layout) {
+ /* Make sure the extra 'ref' for existing layout is removed */
+ dht_layout_unref(this, local->layout);
- if (!acl_default) {
- gf_msg_debug (this->name, 0,
- "ACL_DEFAULT xattr not present");
- goto cont;
- }
- ret = dict_set (dict, POSIX_ACL_DEFAULT_XATTR, acl_default);
- if (ret)
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value.key = %s",
- POSIX_ACL_DEFAULT_XATTR);
-cont:
- acl_access = dict_get (xattr, POSIX_ACL_ACCESS_XATTR);
- if (!acl_access) {
- gf_msg_debug (this->name, 0,
- "ACL_ACCESS xattr not present");
- goto out;
- }
- ret = dict_set (dict, POSIX_ACL_ACCESS_XATTR, acl_access);
- if (ret)
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value.key = %s",
- POSIX_ACL_ACCESS_XATTR);
+ local->layout = new_layout;
+ }
-out:
- return;
+ return local->layout;
}
-int
-dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout, int force)
+/*
+ * Having to call this 2x for each entry in the layout is pretty horrible, but
+ * that's what all of this layout-sorting nonsense gets us.
+ */
+static uint32_t
+dht_get_chunks_from_xl(xlator_t *parent, xlator_t *child)
{
- int missing_dirs = 0;
- int i = 0;
- int ret = -1;
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
- dict_t *dict = NULL;
-
- local = frame->local;
- this = frame->this;
-
- local->selfheal.force_mkdir = force ? _gf_true : _gf_false;
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err == ENOENT || force)
- missing_dirs++;
- }
+ dht_conf_t *priv = parent->private;
+ xlator_list_t *trav;
+ uint32_t index = 0;
- if (missing_dirs == 0) {
- dht_selfheal_dir_setattr (frame, loc, &local->stbuf, 0xffffffff, layout);
- return 0;
- }
+ if (!priv->du_stats) {
+ return 0;
+ }
- local->call_cnt = missing_dirs;
- if (!gf_uuid_is_null (local->gfid)) {
- dict = dict_new ();
- if (!dict)
- return -1;
-
- ret = dict_set_static_bin (dict, "gfid-req", local->gfid, 16);
- if (ret)
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:"
- " key = gfid-req", loc->path);
- } else if (local->params) {
- /* Send the dictionary from higher layers directly */
- dict = dict_ref (local->params);
+ for (trav = parent->children; trav; trav = trav->next) {
+ if (trav->xlator == child) {
+ return priv->du_stats[index].chunks;
}
- /* Set acls */
- if (local->xattr && dict)
- dht_selfheal_dir_mkdir_setacl (local->xattr, dict);
+ ++index;
+ }
- if (!dict)
- gf_log (this->name, GF_LOG_WARNING,
- "dict is NULL, need to make sure gfids are same");
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err == ENOENT || force) {
- gf_msg_debug (this->name, 0,
- "Creating directory %s on subvol %s",
- loc->path, layout->list[i].xlator->name);
-
- STACK_WIND (frame, dht_selfheal_dir_mkdir_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->mkdir,
- loc,
- st_mode_from_ia (local->stbuf.ia_prot,
- local->stbuf.ia_type),
- 0, dict);
- }
- }
-
- if (dict)
- dict_unref (dict);
-
- return 0;
+ return 0;
}
-int
-dht_selfheal_layout_alloc_start (xlator_t *this, loc_t *loc,
- dht_layout_t *layout)
+void
+dht_selfheal_layout_new_directory(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout)
{
- int start = 0;
- uint32_t hashval = 0;
- int ret = 0;
- const char *str = NULL;
- dht_conf_t *conf = NULL;
- char buf[UUID_CANONICAL_FORM_LEN + 1] = {0, };
-
- conf = this->private;
-
- if (conf->randomize_by_gfid) {
- str = uuid_utoa_r (loc->gfid, buf);
+ xlator_t *this = NULL;
+ double chunk = 0;
+ int i = 0;
+ uint32_t start = 0;
+ int bricks_to_use = 0;
+ int err = 0;
+ int start_subvol = 0;
+ uint32_t curr_size;
+ uint32_t range_size;
+ uint64_t total_size = 0;
+ int real_i;
+ dht_conf_t *priv;
+ gf_boolean_t weight_by_size;
+ int bricks_used = 0;
+
+ this = frame->this;
+ priv = this->private;
+ weight_by_size = priv->do_weighting;
+
+ bricks_to_use = dht_get_layout_count(this, layout, 1);
+ GF_ASSERT(bricks_to_use > 0);
+
+ bricks_used = 0;
+ for (i = 0; i < layout->cnt; ++i) {
+ err = layout->list[i].err;
+ if ((err != -1) && (err != ENOENT)) {
+ continue;
+ }
+ curr_size = dht_get_chunks_from_xl(this, layout->list[i].xlator);
+ if (!curr_size) {
+ weight_by_size = _gf_false;
+ break;
+ }
+ total_size += curr_size;
+ if (++bricks_used >= bricks_to_use) {
+ break;
+ }
+ }
+
+ if (weight_by_size && total_size) {
+ /* We know total_size is not zero. */
+ chunk = ((double)0xffffffff) / ((double)total_size);
+ gf_msg_debug(this->name, 0,
+ "chunk size = 0xffffffff / %" PRIu64 " = %f", total_size,
+ chunk);
+ } else {
+ weight_by_size = _gf_false;
+ chunk = ((unsigned long)0xffffffff) / bricks_to_use;
+ }
+
+ start_subvol = dht_selfheal_layout_alloc_start(this, loc, layout);
+
+ /* clear out the range, as we are re-computing here */
+ DHT_RESET_LAYOUT_RANGE(layout);
+
+ /*
+ * OK, what's this "real_i" stuff about? This used to be two loops -
+ * from start_subvol to layout->cnt-1, then from 0 to start_subvol-1.
+ * That way is practically an open invitation to bugs when only one
+ * of the loops is updated. Using real_i and modulo operators to make
+ * it one loop avoids this problem. Remember, folks: it's everyone's
+ * responsibility to help stamp out copy/paste abuse.
+ */
+ bricks_used = 0;
+ for (real_i = 0; real_i < layout->cnt; real_i++) {
+ i = (real_i + start_subvol) % layout->cnt;
+ err = layout->list[i].err;
+ if ((err != -1) && (err != ENOENT)) {
+ continue;
+ }
+ if (weight_by_size) {
+ curr_size = dht_get_chunks_from_xl(this, layout->list[i].xlator);
+ if (!curr_size) {
+ continue;
+ }
} else {
- str = loc->path;
+ curr_size = 1;
}
-
- ret = dht_hash_compute (this, layout->type, str, &hashval);
- if (ret == 0) {
- start = (hashval % layout->cnt);
+ range_size = chunk * curr_size;
+ gf_msg_debug(this->name, 0, "assigning range size 0x%x to %s",
+ range_size, layout->list[i].xlator->name);
+ DHT_SET_LAYOUT_RANGE(layout, i, start, range_size, loc->path);
+ if (++bricks_used >= bricks_to_use) {
+ layout->list[i].stop = 0xffffffff;
+ goto done;
}
+ start += range_size;
+ }
- return start;
+done:
+ return;
}
-static inline int
-dht_get_layout_count (xlator_t *this, dht_layout_t *layout, int new_layout)
+static int
+dht_selfheal_dir_getafix(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
{
- int i = 0;
- int j = 0;
- int err = 0;
- int count = 0;
- dht_conf_t *conf = NULL;
-
- /* Gets in use only for replace-brick, remove-brick */
- conf = this->private;
- for (i = 0; i < layout->cnt; i++) {
- for (j = 0; j < conf->subvolume_cnt; j++) {
- if (conf->decommissioned_bricks[j] &&
- conf->decommissioned_bricks[j] == layout->list[i].xlator) {
- layout->list[i].err = EINVAL;
- break;
- }
- }
- }
-
- for (i = 0; i < layout->cnt; i++) {
- err = layout->list[i].err;
- if (err == -1 || err == 0 || err == ENOENT) {
- /* Take this with a pinch of salt. The behaviour seems
- * to be slightly different when this function is
- * invoked from mkdir codepath. For eg., err == 0 in
- * mkdir codepath means directory created but xattr
- * is not set yet.
- */
-
- /* Setting list[i].err = -1 is an indication for
- dht_selfheal_layout_new_directory() to assign
- a range. We set it to -1 based on any one of
- the three criteria:
-
- - err == -1 already, which means directory
- existed but layout was not set on it.
-
- - err == 0, which means directory exists and
- has an old layout piece which will be
- overwritten now.
-
- - err == ENOENT, which means directory does
- not exist (possibly racing with mkdir or
- finishing half done mkdir). The missing
- directory will be attempted to be recreated.
- */
- count++;
- if (!err)
- layout->list[i].err = -1;
- }
- }
+ dht_local_t *local = NULL;
+ uint32_t holes = 0;
+ int ret = -1;
+ int i = -1;
+ uint32_t overlaps = 0;
+
+ local = frame->local;
+
+ holes = local->selfheal.hole_cnt;
+ overlaps = local->selfheal.overlaps_cnt;
+
+ if (holes || overlaps) {
+ /* If the layout has anomalies which would change the hash
+ * ranges, then we need to reset the commit_hash for this
+ * directory, as the layout would change and things may not
+ * be in place as expected */
+ layout->commit_hash = DHT_LAYOUT_HASH_INVALID;
+ dht_selfheal_layout_new_directory(frame, loc, layout);
+ ret = 0;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ /* directory not present */
+ if (layout->list[i].err == ENOENT) {
+ ret = 0;
+ break;
+ }
+ }
+
+ /* TODO: give a fix to these non-virgins */
+
+ return ret;
+}
- /* no subvolume has enough space, but can't stop directory creation */
- if (!count || !new_layout) {
- for (i = 0; i < layout->cnt; i++) {
- err = layout->list[i].err;
- if (err == ENOSPC) {
- layout->list[i].err = -1;
- count++;
- }
- }
- }
+int
+dht_selfheal_new_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ int ret = 0;
+ inode_t *linked_inode = NULL, *inode = NULL;
+ loc_t *loc = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ int32_t op_errno = EIO;
- /* if layout->spread_cnt is set, check if it is <= available
- * subvolumes (down brick and decommissioned bricks are considered
- * un-availbale). Else return count (available up bricks) */
- count = ((layout->spread_cnt &&
- (layout->spread_cnt <= count)) ?
- layout->spread_cnt : ((count) ? count : 1));
+ local = frame->local;
- return count;
-}
+ loc = &local->loc;
+ gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
-void dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
- dht_layout_t *new_layout);
+ linked_inode = inode_link(loc->inode, loc->parent, loc->name,
+ &local->stbuf);
+ if (!linked_inode) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_LINK_INODE_FAILED,
+ "pgfid=%s", pgfid, "name=%s", loc->name, "gfid=%s", gfid, NULL);
+ ret = -1;
+ goto out;
+ }
-void dht_layout_entry_swap (dht_layout_t *layout, int i, int j);
-void dht_layout_range_swap (dht_layout_t *layout, int i, int j);
+ inode = loc->inode;
+ loc->inode = linked_inode;
+ inode_unref(inode);
-/*
- * It's a bit icky using local variables in a macro, but it makes the rest
- * of the code a lot clearer.
- */
-#define OV_ENTRY(x,y) table[x*new->cnt+y]
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref(frame->this, layout);
-void
-dht_selfheal_layout_maximize_overlap (call_frame_t *frame, loc_t *loc,
- dht_layout_t *new, dht_layout_t *old)
-{
- int i = 0;
- int j = 0;
- uint32_t curr_overlap = 0;
- uint32_t max_overlap = 0;
- int max_overlap_idx = -1;
- uint32_t overlap = 0;
- uint32_t *table = NULL;
-
- dht_layout_sort_volname (old);
- /* Now both old_layout->list[] and new_layout->list[]
- are match the same xlators/subvolumes. i.e,
- old_layout->[i] and new_layout->[i] are referring
- to the same subvolumes
- */
-
- /* Build a table of overlaps between new[i] and old[j]. */
- table = alloca(sizeof(overlap)*old->cnt*new->cnt);
- if (!table) {
- return;
- }
- memset(table,0,sizeof(overlap)*old->cnt*new->cnt);
- for (i = 0; i < new->cnt; ++i) {
- for (j = 0; j < old->cnt; ++j) {
- OV_ENTRY(i,j) = dht_overlap_calc(old,j,new,i);
- }
- }
+ dht_layout_sort_volname(layout);
+ dht_selfheal_layout_new_directory(frame, &local->loc, layout);
- for (i = 0; i < new->cnt; i++) {
- if (new->list[i].err > 0) {
- /* Subvol might be marked for decommission
- with EINVAL, or some other serious error
- marked with positive errno.
- */
- continue;
- }
+ op_errno = ENOMEM;
+ ret = dht_selfheal_layout_lock(frame, layout, _gf_true,
+ dht_selfheal_dir_xattr,
+ dht_should_heal_layout);
- max_overlap = 0;
- max_overlap_idx = i;
- for (j = (i + 1); j < new->cnt; ++j) {
- if (new->list[j].err > 0) {
- /* Subvol might be marked for decommission
- with EINVAL, or some other serious error
- marked with positive errno.
- */
- continue;
- }
- /* Calculate the overlap now. */
- curr_overlap = OV_ENTRY(i,i) + OV_ENTRY(j,j);
- /* Calculate the overlap after the proposed swap. */
- overlap = OV_ENTRY(i,j) + OV_ENTRY(j,i);
- /* Are we better than status quo? */
- if (overlap > curr_overlap) {
- overlap -= curr_overlap;
- /* Are we better than the previous choice? */
- if (overlap > max_overlap) {
- max_overlap = overlap;
- max_overlap_idx = j;
- }
- }
- }
+out:
+ if (ret < 0) {
+ dir_cbk(frame, NULL, frame->this, -1, op_errno, NULL);
+ }
- if (max_overlap_idx != i) {
- dht_layout_range_swap (new, i, max_overlap_idx);
- /* Need to swap the table values too. */
- for (j = 0; j < old->cnt; ++j) {
- overlap = OV_ENTRY(i,j);
- OV_ENTRY(i,j) = OV_ENTRY(max_overlap_idx,j);
- OV_ENTRY(max_overlap_idx,j) = overlap;
- }
- }
- }
+ return 0;
}
-
-dht_layout_t *
-dht_fix_layout_of_directory (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout)
+int
+dht_fix_directory_layout(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ dht_layout_t *layout)
{
- int i = 0;
- xlator_t *this = NULL;
- dht_layout_t *new_layout = NULL;
- dht_conf_t *priv = NULL;
- dht_local_t *local = NULL;
- uint32_t subvol_down = 0;
- int ret = 0;
-
- this = frame->this;
- priv = this->private;
- local = frame->local;
-
- if (layout->type == DHT_HASH_TYPE_DM_USER) {
- gf_msg_debug (THIS->name, 0, "leaving %s alone",
- loc->path);
- goto done;
- }
+ dht_local_t *local = NULL;
+ dht_layout_t *tmp_layout = NULL;
+ int ret = 0;
- new_layout = dht_layout_new (this, priv->subvolume_cnt);
- if (!new_layout)
- goto done;
-
- /* If a subvolume is down, do not re-write the layout. */
- ret = dht_layout_anomalies (this, loc, layout, NULL, NULL, NULL,
- &subvol_down, NULL, NULL);
-
- if (subvol_down || (ret == -1)) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_LAYOUT_FIX_FAILED,
- "Layout fix failed: %u subvolume(s) are down"
- ". Skipping fix layout.", subvol_down);
- GF_FREE (new_layout);
- return NULL;
- }
+ local = frame->local;
- for (i = 0; i < new_layout->cnt; i++) {
- if (layout->list[i].err != ENOSPC)
- new_layout->list[i].err = layout->list[i].err;
- else
- new_layout->list[i].err = -1;
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref(frame->this, layout);
- new_layout->list[i].xlator = layout->list[i].xlator;
- }
+ /* No layout sorting required here */
+ tmp_layout = dht_fix_layout_of_directory(frame, &local->loc, layout);
+ if (!tmp_layout) {
+ return -1;
+ }
- if (priv->du_stats) {
- for (i = 0; i < priv->subvolume_cnt; ++i) {
- gf_log (this->name, GF_LOG_INFO,
- "subvolume %d (%s): %u chunks", i,
- priv->subvolumes[i]->name,
- priv->du_stats[i].chunks);
- }
- }
- else {
- gf_log (this->name, GF_LOG_WARNING, "no du stats ?!?");
- }
+ ret = dht_selfheal_layout_lock(frame, tmp_layout, _gf_false,
+ dht_fix_dir_xattr, dht_should_fix_layout);
- /* First give it a layout as though it is a new directory. This
- ensures rotation to kick in */
- dht_layout_sort_volname (new_layout);
- dht_selfheal_layout_new_directory (frame, loc, new_layout);
+ return ret;
+}
- /* Now selectively re-assign ranges only when it helps */
- dht_selfheal_layout_maximize_overlap (frame, loc, new_layout, layout);
+int
+dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ loc_t *loc, dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+ int ret = 0;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ inode_t *linked_inode = NULL, *inode = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref(this, layout);
+
+ if (local->need_attrheal) {
+ if (__is_root_gfid(local->stbuf.ia_gfid)) {
+ local->stbuf.ia_gid = local->prebuf.ia_gid;
+ local->stbuf.ia_uid = local->prebuf.ia_uid;
+
+ local->stbuf.ia_ctime = local->prebuf.ia_ctime;
+ local->stbuf.ia_ctime_nsec = local->prebuf.ia_ctime_nsec;
+ local->stbuf.ia_prot = local->prebuf.ia_prot;
+
+ } else if (!IA_ISINVAL(local->mds_stbuf.ia_type)) {
+ local->stbuf = local->mds_stbuf;
+ }
+ }
+
+ if (!__is_root_gfid(local->stbuf.ia_gfid)) {
+ gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ linked_inode = inode_link(loc->inode, loc->parent, loc->name,
+ &local->stbuf);
+ if (!linked_inode) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LINK_INODE_FAILED,
+ "pgfid=%s", pgfid, "name=%s", loc->name, "gfid=%s", gfid,
+ NULL);
+ ret = 0;
+ goto sorry_no_fix;
+ }
+
+ inode = loc->inode;
+ loc->inode = linked_inode;
+ inode_unref(inode);
+ }
+
+ if (local->need_xattr_heal && (local->mds_xattr)) {
+ dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr,
+ NULL, NULL);
+ dict_unref(local->mds_xattr);
+ local->mds_xattr = NULL;
+ }
+
+ dht_layout_anomalies(this, loc, layout, &local->selfheal.hole_cnt,
+ &local->selfheal.overlaps_cnt,
+ &local->selfheal.missing_cnt, &local->selfheal.down,
+ &local->selfheal.misc, NULL);
+
+ down = local->selfheal.down;
+ misc = local->selfheal.misc;
+
+ if (down) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SELFHEAL_FAILED,
+ "path=%s", loc->path, "subvol-down=%d", down, "Not-fixing",
+ "gfid=%s", gfid, NULL);
+ ret = 0;
+ goto sorry_no_fix;
+ }
+
+ if (misc) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SELFHEAL_FAILED,
+ "path=%s", loc->path, "misc=%d", misc, "unrecoverable-errors",
+ "gfid=%s", gfid, NULL);
+
+ ret = 0;
+ goto sorry_no_fix;
+ }
+
+ dht_layout_sort_volname(layout);
+ local->heal_layout = _gf_true;
+
+ /* Ignore return value as it can be inferred from result of
+ * dht_layout_anomalies
+ */
+ dht_selfheal_dir_getafix(frame, loc, layout);
+
+ if (!(local->selfheal.hole_cnt || local->selfheal.overlaps_cnt ||
+ local->selfheal.missing_cnt)) {
+ local->heal_layout = _gf_false;
+ }
+
+ ret = dht_selfheal_dir_mkdir(frame, loc, layout, 0);
+ if (ret < 0) {
+ ret = 0;
+ goto sorry_no_fix;
+ }
+
+ return 0;
-done:
- if (new_layout) {
- /* Now that the new layout has all the proper layout, change the
- inode context */
- dht_layout_set (this, loc->inode, new_layout);
+sorry_no_fix:
+ /* TODO: need to put appropriate local->op_errno */
+ dht_selfheal_dir_finish(frame, this, ret, 1);
- /* Make sure the extra 'ref' for existing layout is removed */
- dht_layout_unref (this, local->layout);
+ return 0;
+}
- local->layout = new_layout;
- }
+int
+dht_selfheal_restore(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ loc_t *loc, dht_layout_t *layout)
+{
+ int ret = 0;
+ dht_local_t *local = NULL;
- return local->layout;
-}
+ local = frame->local;
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref(frame->this, layout);
-/*
- * Having to call this 2x for each entry in the layout is pretty horrible, but
- * that's what all of this layout-sorting nonsense gets us.
- */
-uint32_t
-dht_get_chunks_from_xl (xlator_t *parent, xlator_t *child)
-{
- dht_conf_t *priv = parent->private;
- xlator_list_t *trav;
- uint32_t index = 0;
+ ret = dht_selfheal_dir_mkdir(frame, loc, layout, 1);
- if (!priv->du_stats) {
- return 0;
- }
+ return ret;
+}
- for (trav = parent->children; trav; trav = trav->next) {
- if (trav->xlator == child) {
- return priv->du_stats[index].chunks;
- }
- ++index;
- }
+int
+dht_dir_heal_xattrs(void *data)
+{
+ call_frame_t *frame = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *mds_subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dict_t *user_xattr = NULL;
+ dict_t *internal_xattr = NULL;
+ dict_t *mds_xattr = NULL;
+ dict_t *xdata = NULL;
+ int call_cnt = 0;
+ int ret = -1;
+ int uret = 0;
+ int uflag = 0;
+ int i = 0;
+ int xattr_hashed = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ int32_t allzero[1] = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", data, out);
+
+ frame = data;
+ local = frame->local;
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, local, out);
+ mds_subvol = local->mds_subvol;
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ if (!mds_subvol) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_MDS_SUBVOL, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+
+ if ((local->loc.inode && gf_uuid_is_null(local->loc.inode->gfid)) ||
+ gf_uuid_is_null(local->loc.gfid)) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_NOT_PRESENT,
+ "skip-heal path=%s", local->loc.path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+
+ internal_xattr = dict_new();
+ if (!internal_xattr) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED,
+ "dictionary", NULL);
+ goto out;
+ }
+ xdata = dict_new();
+ if (!xdata) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED,
+ "dictionary", NULL);
+ goto out;
+ }
+
+ call_cnt = conf->subvolume_cnt;
+
+ user_xattr = dict_new();
+ if (!user_xattr) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED,
+ "dictionary", NULL);
+ goto out;
+ }
+
+ ret = syncop_listxattr(local->mds_subvol, &local->loc, &mds_xattr, NULL,
+ NULL);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LIST_XATTRS_FAILED,
+ "path=%s", local->loc.path, "name=%s", local->mds_subvol->name,
+ NULL);
+ }
+
+ if (!mds_xattr)
+ goto out;
+
+ dht_dir_set_heal_xattr(this, local, user_xattr, mds_xattr, &uret, &uflag);
+
+ /* To set quota related xattr need to set GLUSTERFS_INTERNAL_FOP_KEY
+ * key value to 1
+ */
+ if (dict_get(user_xattr, QUOTA_LIMIT_KEY) ||
+ dict_get(user_xattr, QUOTA_LIMIT_OBJECTS_KEY)) {
+ ret = dict_set_int32(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "key=%s", GLUSTERFS_INTERNAL_FOP_KEY, "path=%s",
+ local->loc.path, NULL);
+ goto out;
+ }
+ }
+ if (uret <= 0 && !uflag)
+ goto out;
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = conf->subvolumes[i];
+ if (subvol == mds_subvol)
+ continue;
+ if (uret || uflag) {
+ /* Custom xattr heal is required - let posix handle it */
+ ret = dict_set_int8(xdata, "sync_backend_xattrs", _gf_true);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", local->loc.path, "key=%s",
+ "sync_backend_xattrs", NULL);
+ goto out;
+ }
+
+ ret = syncop_setxattr(subvol, &local->loc, user_xattr, 0, xdata,
+ NULL);
+ if (ret) {
+ xattr_hashed = 1;
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "set-user-xattr-failed path=%s", local->loc.path,
+ "subvol=%s", subvol->name, "gfid=%s", gfid, NULL);
+ } else {
+ dict_del(xdata, "sync_backend_xattrs");
+ }
+ }
+ }
+ /* After heal all custom xattr reset internal MDS xattr to 0 */
+ if (!xattr_hashed) {
+ ret = dht_dict_set_array(internal_xattr, conf->mds_xattr_key, allzero,
+ 1);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "key=%s", conf->mds_xattr_key, "path=%s", local->loc.path,
+ NULL);
+ goto out;
+ }
+ ret = syncop_setxattr(mds_subvol, &local->loc, internal_xattr, 0, NULL,
+ NULL);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s", local->loc.path,
+ "subvol=%s", mds_subvol->name, "gfid=%s", gfid, NULL);
+ }
+ }
- return 0;
+out:
+ if (user_xattr)
+ dict_unref(user_xattr);
+ if (mds_xattr)
+ dict_unref(mds_xattr);
+ if (internal_xattr)
+ dict_unref(internal_xattr);
+ if (xdata)
+ dict_unref(xdata);
+ return 0;
}
+int
+dht_dir_heal_xattrs_done(int ret, call_frame_t *sync_frame, void *data)
+{
+ DHT_STACK_DESTROY(sync_frame);
+ return 0;
+}
-void
-dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout)
+int
+dht_dir_attr_heal(void *data)
{
- xlator_t *this = NULL;
- uint32_t chunk = 0;
- int i = 0;
- uint32_t start = 0;
- int bricks_to_use = 0;
- int err = 0;
- int start_subvol = 0;
- uint32_t curr_size;
- uint32_t total_size = 0;
- int real_i;
- dht_conf_t *priv;
- gf_boolean_t weight_by_size;
- int bricks_used = 0;
-
- this = frame->this;
- priv = this->private;
- weight_by_size = priv->do_weighting;
-
- bricks_to_use = dht_get_layout_count (this, layout, 1);
- GF_ASSERT (bricks_to_use > 0);
-
- bricks_used = 0;
- for (i = 0; i < layout->cnt; ++i) {
- err = layout->list[i].err;
- if ((err != -1) && (err != ENOENT)) {
- continue;
- }
- curr_size = dht_get_chunks_from_xl (this,
- layout->list[i].xlator);
- if (!curr_size) {
- weight_by_size = _gf_false;
- break;
- }
- total_size += curr_size;
- if (++bricks_used >= bricks_to_use) {
- break;
+ call_frame_t *frame = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *mds_subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ int call_cnt = 0;
+ int ret = -1;
+ int i = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", data, out);
+
+ frame = data;
+ local = frame->local;
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", local, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO("dht", conf, out);
+
+ mds_subvol = local->mds_subvol;
+ call_cnt = conf->subvolume_cnt;
+
+ if (!__is_root_gfid(local->stbuf.ia_gfid) && (!mds_subvol)) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_MDS_SUBVOL, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+
+ if (!__is_root_gfid(local->stbuf.ia_gfid)) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == mds_subvol) {
+ if (!conf->subvolume_status[i]) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MDS_DOWN_UNABLE_TO_SET, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
+ goto out;
}
+ }
+ }
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = conf->subvolumes[i];
+ if (!subvol || subvol == mds_subvol)
+ continue;
+ if (__is_root_gfid(local->stbuf.ia_gfid)) {
+ ret = syncop_setattr(
+ subvol, &local->loc, &local->stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE), NULL,
+ NULL, NULL, NULL);
+ } else {
+ ret = syncop_setattr(
+ subvol, &local->loc, &local->mds_stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE), NULL,
+ NULL, NULL, NULL);
}
- if (weight_by_size && total_size) {
- /* We know total_size is not zero. */
- chunk = ((unsigned long) 0xffffffff) / total_size;
- gf_log (this->name, GF_LOG_INFO,
- "chunk size = 0xffffffff / %u = 0x%x",
- total_size, chunk);
- }
- else {
- weight_by_size = _gf_false;
- chunk = ((unsigned long) 0xffffffff) / bricks_to_use;
- }
-
- start_subvol = dht_selfheal_layout_alloc_start (this, loc, layout);
-
- /* clear out the range, as we are re-computing here */
- DHT_RESET_LAYOUT_RANGE (layout);
+ if (ret) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
- /*
- * OK, what's this "real_i" stuff about? This used to be two loops -
- * from start_subvol to layout->cnt-1, then from 0 to start_subvol-1.
- * That way is practically an open invitation to bugs when only one
- * of the loops is updated. Using real_i and modulo operators to make
- * it one loop avoids this problem. Remember, folks: it's everyone's
- * responsibility to help stamp out copy/paste abuse.
- */
- bricks_used = 0;
- for (real_i = 0; real_i < layout->cnt; real_i++) {
- i = (real_i + start_subvol) % layout->cnt;
- err = layout->list[i].err;
- if ((err != -1) && (err != ENOENT)) {
- continue;
- }
- if (weight_by_size) {
- curr_size = dht_get_chunks_from_xl (this,
- layout->list[i].xlator);
- if (!curr_size) {
- continue;
- }
- }
- else {
- curr_size = 1;
- }
- gf_log (this->name, GF_LOG_INFO,
- "assigning range size 0x%x to %s", chunk * curr_size,
- layout->list[i].xlator->name);
- DHT_SET_LAYOUT_RANGE(layout, i, start, chunk * curr_size,
- loc->path);
- if (++bricks_used >= bricks_to_use) {
- layout->list[i].stop = 0xffffffff;
- goto done;
- }
- start += (chunk * curr_size);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_ATTR_HEAL_FAILED, "path=%s", local->loc.path,
+ "subvol=%s", subvol->name, "gfid=%s", gfid, NULL);
}
-
-done:
- return;
+ }
+out:
+ return 0;
}
int
-dht_selfheal_dir_getafix (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout)
+dht_dir_attr_heal_done(int ret, call_frame_t *sync_frame, void *data)
{
- dht_local_t *local = NULL;
- uint32_t holes = 0;
- int ret = -1;
- int i = -1;
- uint32_t overlaps = 0;
+ DHT_STACK_DESTROY(sync_frame);
+ return 0;
+}
- local = frame->local;
+/* EXIT: dht_update_commit_hash_for_layout */
+static int
+dht_update_commit_hash_for_layout_done(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
- holes = local->selfheal.hole_cnt;
- overlaps = local->selfheal.overlaps_cnt;
+ local = frame->local;
- if (holes || overlaps) {
- dht_selfheal_layout_new_directory (frame, loc, layout);
- ret = 0;
- }
+ /* preserve oldest error */
+ if (op_ret && !local->op_ret) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
- for (i = 0; i < layout->cnt; i++) {
- /* directory not present */
- if (layout->list[i].err == ENOENT) {
- ret = 0;
- break;
- }
- }
-
- /* TODO: give a fix to these non-virgins */
+ DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno, NULL);
- return ret;
+ return 0;
}
-int
-dht_selfheal_new_directory (call_frame_t *frame,
- dht_selfheal_dir_cbk_t dir_cbk,
- dht_layout_t *layout)
+static int
+dht_update_commit_hash_for_layout_unlock(call_frame_t *frame, xlator_t *this)
{
- dht_local_t *local = NULL;
- int ret = 0;
-
- local = frame->local;
+ dht_local_t *local = NULL;
+ int ret = 0;
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = dht_layout_ref (frame->this, layout);
+ local = frame->local;
- dht_layout_sort_volname (layout);
- dht_selfheal_layout_new_directory (frame, &local->loc, layout);
+ ret = dht_unlock_inodelk(frame, local->lock[0].layout.my_layout.locks,
+ local->lock[0].layout.my_layout.lk_count,
+ dht_update_commit_hash_for_layout_done);
+ if (ret < 0) {
+ /* preserve oldest error, just ... */
+ if (!local->op_ret) {
+ local->op_errno = errno;
+ local->op_ret = -1;
+ }
- ret = dht_selfheal_layout_lock (frame, layout, _gf_true,
- dht_selfheal_dir_xattr,
- dht_should_heal_layout);
+ gf_smsg(this->name, GF_LOG_WARNING, errno, DHT_MSG_WIND_UNLOCK_FAILED,
+ "path=%s", local->loc.path, NULL);
- if (ret < 0) {
- dir_cbk (frame, NULL, frame->this, -1, ENOMEM, NULL);
- }
+ dht_update_commit_hash_for_layout_done(frame, NULL, this, 0, 0, NULL);
+ }
- return 0;
+ return 0;
}
-int
-dht_fix_directory_layout (call_frame_t *frame,
- dht_selfheal_dir_cbk_t dir_cbk,
- dht_layout_t *layout)
+static int
+dht_update_commit_hash_for_layout_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_layout_t *tmp_layout = NULL;
- int ret = 0;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
- local = frame->local;
+ local = frame->local;
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = dht_layout_ref (frame->this, layout);
+ LOCK(&frame->lock);
+ /* store first failure, just because */
+ if (op_ret && !local->op_ret) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ UNLOCK(&frame->lock);
- /* No layout sorting required here */
- tmp_layout = dht_fix_layout_of_directory (frame, &local->loc, layout);
- if (!tmp_layout) {
- return -1;
- }
+ this_call_cnt = dht_frame_return(frame);
- ret = dht_selfheal_layout_lock (frame, tmp_layout, _gf_false,
- dht_fix_dir_xattr,
- dht_should_fix_layout);
+ if (is_last_call(this_call_cnt)) {
+ dht_update_commit_hash_for_layout_unlock(frame, this);
+ }
- return ret;
+ return 0;
}
-
-int
-dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
- loc_t *loc, dht_layout_t *layout)
+static int
+dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- uint32_t down = 0;
- uint32_t misc = 0;
- int ret = 0;
- xlator_t *this = NULL;
- char gfid[GF_UUID_BUF_SIZE] = {0};
-
- local = frame->local;
- this = frame->this;
-
- gf_uuid_unparse(loc->gfid, gfid);
-
- dht_layout_anomalies (this, loc, layout,
- &local->selfheal.hole_cnt,
- &local->selfheal.overlaps_cnt,
- NULL, &local->selfheal.down,
- &local->selfheal.misc, NULL);
-
- down = local->selfheal.down;
- misc = local->selfheal.misc;
-
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = dht_layout_ref (this, layout);
-
- if (down) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DIR_SELFHEAL_FAILED,
- "Directory selfheal failed: %d subvolumes down."
- "Not fixing. path = %s, gfid = %s",
- down, loc->path, gfid);
- ret = 0;
- goto sorry_no_fix;
- }
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1, i = 0, j = 0;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ int32_t *disk_layout = NULL;
+ dict_t **xattr = NULL;
+
+ local = frame->local;
+ conf = frame->this->private;
+ count = conf->local_subvols_cnt;
+ layout = local->layout;
+
+ if (op_ret < 0) {
+ goto err_done;
+ }
+
+ /* We precreate the xattr list as we cannot change call count post the
+ * first wind as we may never continue from there. So we finish prep
+ * work before winding the setxattrs */
+ xattr = GF_CALLOC(count, sizeof(*xattr), gf_common_mt_char);
+ if (!xattr) {
+ local->op_errno = errno;
+
+ gf_smsg(this->name, GF_LOG_WARNING, errno, DHT_MSG_COMMIT_HASH_FAILED,
+ "allocation-failed path=%s", local->loc.path, NULL);
+
+ goto err;
+ }
+
+ for (i = 0; i < count; i++) {
+ /* find the layout index for the subvolume */
+ ret = dht_layout_index_for_subvol(layout, conf->local_subvols[i]);
+ if (ret < 0) {
+ local->op_errno = ENOENT;
- if (misc) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DIR_SELFHEAL_FAILED,
- "Directory selfheal failed : %d subvolumes "
- "have unrecoverable errors. path = %s, gfid = %s",
- misc, loc->path, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMMIT_HASH_FAILED,
+ "path=%s", local->loc.path, "subvol=%s",
+ conf->local_subvols[i]->name, "find-disk-layout-failed",
+ NULL);
- ret = 0;
- goto sorry_no_fix;
+ goto err;
}
+ j = ret;
- dht_layout_sort_volname (layout);
- ret = dht_selfheal_dir_getafix (frame, loc, layout);
+ /* update the commit hash for the layout */
+ layout->list[j].commit_hash = layout->commit_hash;
+ /* extract the current layout */
+ ret = dht_disk_layout_extract(this, layout, j, &disk_layout);
if (ret == -1) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DIR_SELFHEAL_FAILED,
- "Directory selfheal failed: "
- "Unable to form layout for directory %s",
- loc->path);
- goto sorry_no_fix;
- }
+ local->op_errno = errno;
- dht_selfheal_dir_mkdir (frame, loc, layout, 0);
+ gf_smsg(this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_COMMIT_HASH_FAILED, "path=%s", local->loc.path,
+ "subvol=%s", conf->local_subvols[i]->name,
+ "extract-disk-layout-failed", NULL);
- return 0;
+ goto err;
+ }
-sorry_no_fix:
- /* TODO: need to put appropriate local->op_errno */
- dht_selfheal_dir_finish (frame, this, ret);
+ xattr[i] = dict_new();
+ if (!xattr[i]) {
+ local->op_errno = errno;
- return 0;
-}
+ gf_smsg(this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_COMMIT_HASH_FAILED, "path=%s Allocation-failed",
+ local->loc.path, NULL);
-int
-dht_selfheal_directory_for_nameless_lookup (call_frame_t *frame,
- dht_selfheal_dir_cbk_t dir_cbk,
- loc_t *loc, dht_layout_t *layout)
-{
- dht_local_t *local = NULL;
- uint32_t down = 0;
- uint32_t misc = 0;
- int ret = 0;
- xlator_t *this = NULL;
-
- local = frame->local;
- this = frame->this;
- dht_layout_anomalies (this, loc, layout,
- &local->selfheal.hole_cnt,
- &local->selfheal.overlaps_cnt,
- NULL, &local->selfheal.down,
- &local->selfheal.misc, NULL);
-
- down = local->selfheal.down;
- misc = local->selfheal.misc;
-
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = dht_layout_ref (this, layout);
-
- if (down) {
- gf_log (this->name, GF_LOG_WARNING,
- "%d subvolumes down -- not fixing", down);
- ret = 0;
- goto sorry_no_fix;
+ goto err;
}
- if (misc) {
- gf_log (this->name, GF_LOG_WARNING,
- "%d subvolumes have unrecoverable errors", misc);
- ret = 0;
- goto sorry_no_fix;
- }
+ ret = dict_set_bin(xattr[i], conf->xattr_name, disk_layout, 4 * 4);
+ if (ret != 0) {
+ local->op_errno = ENOMEM;
- dht_layout_sort_volname (layout);
- ret = dht_selfheal_dir_getafix (frame, loc, layout);
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "path=%s",
+ local->loc.path, "subvol=%s", conf->local_subvols[i]->name,
+ "set-xattr-failed", NULL);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "not able to form layout for the directory");
- goto sorry_no_fix;
+ goto err;
}
+ disk_layout = NULL;
- ret = dht_selfheal_layout_lock (frame, layout, _gf_false,
- dht_selfheal_dir_xattr_for_nameless_lookup,
- dht_should_heal_layout);
-
- if (ret < 0) {
- goto sorry_no_fix;
+ gf_msg_trace(this->name, 0,
+ "setting commit hash %u on subvolume %s"
+ " for %s",
+ layout->list[j].commit_hash, conf->local_subvols[i]->name,
+ local->loc.path);
+ }
+
+ /* wind the setting of the commit hash across the local subvols */
+ local->call_cnt = count;
+ local->op_ret = 0;
+ local->op_errno = 0;
+ for (i = 0; i < count; i++) {
+ STACK_WIND(frame, dht_update_commit_hash_for_layout_cbk,
+ conf->local_subvols[i],
+ conf->local_subvols[i]->fops->setxattr, &local->loc,
+ xattr[i], 0, NULL);
+ }
+ for (i = 0; i < count; i++)
+ dict_unref(xattr[i]);
+ GF_FREE(xattr);
+
+ return 0;
+err:
+ if (xattr) {
+ for (i = 0; i < count; i++) {
+ if (xattr[i])
+ dict_unref(xattr[i]);
}
- return 0;
+ GF_FREE(xattr);
+ }
-sorry_no_fix:
- /* TODO: need to put appropriate local->op_errno */
- dht_selfheal_dir_finish (frame, this, ret);
+ GF_FREE(disk_layout);
- return 0;
+ local->op_ret = -1;
+ dht_update_commit_hash_for_layout_unlock(frame, this);
-}
-
-int
-dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
- loc_t *loc, dht_layout_t *layout)
-{
- int ret = 0;
- dht_local_t *local = NULL;
+ return 0;
+err_done:
+ local->op_ret = -1;
- local = frame->local;
+ dht_update_commit_hash_for_layout_done(frame, NULL, this, 0, 0, NULL);
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = dht_layout_ref (frame->this, layout);
-
- ret = dht_selfheal_dir_mkdir (frame, loc, layout, 1);
-
- return ret;
+ return 0;
}
+/* ENTER: dht_update_commit_hash_for_layout (see EXIT above)
+ * This function is invoked from rebalance only.
+ * As a result, the check here is simple enough to see if defrag is present
+ * in the conf, as other data would be populated appropriately if so.
+ * If ever this was to be used in other code paths, checks would need to
+ * change.
+ *
+ * Functional details:
+ * - Lock the inodes on the subvols that we want the commit hash updated
+ * - Update each layout with the inode layout, modified to take in the new
+ * commit hash.
+ * - Unlock and return.
+ */
int
-dht_dir_attr_heal (void *data)
+dht_update_commit_hash_for_layout(call_frame_t *frame)
{
- call_frame_t *frame = NULL;
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- int call_cnt = 0;
- int ret = -1;
- int i = 0;
- char gfid[GF_UUID_BUF_SIZE] = {0};
-
-
- GF_VALIDATE_OR_GOTO ("dht", data, out);
-
- frame = data;
- local = frame->local;
- this = frame->this;
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", local, out);
- conf = this->private;
- GF_VALIDATE_OR_GOTO ("dht", conf, out);
-
- call_cnt = conf->subvolume_cnt;
-
- for (i = 0; i < call_cnt; i++) {
- subvol = conf->subvolumes[i];
- if (!subvol || (subvol == dht_first_up_subvol (this)))
- continue;
- ret = syncop_setattr (subvol, &local->loc, &local->stbuf,
- (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
- NULL, NULL, NULL, NULL);
- if (ret) {
- gf_uuid_unparse(local->loc.gfid, gfid);
-
- gf_msg ("dht", GF_LOG_ERROR, -ret,
- DHT_MSG_DIR_ATTR_HEAL_FAILED,
- "Directory attr heal failed. Failed to set"
- " uid/gid on path %s on subvol %s, gfid = %s ",
- local->loc.path, subvol->name, gfid);
- }
- }
-out:
- return 0;
-}
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1, i = 0;
+ dht_lock_t **lk_array = NULL;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO(frame->this->name, frame->local, err);
+
+ local = frame->local;
+ conf = frame->this->private;
+
+ if (!conf->defrag)
+ goto err;
+
+ count = conf->local_subvols_cnt;
+ lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char);
+ if (lk_array == NULL)
+ goto err;
+
+ for (i = 0; i < count; i++) {
+ lk_array[i] = dht_lock_new(frame->this, conf->local_subvols[i],
+ &local->loc, F_WRLCK, DHT_LAYOUT_HEAL_DOMAIN,
+ NULL, FAIL_ON_ANY_ERROR);
+ if (lk_array[i] == NULL)
+ goto err;
+ }
+
+ local->lock[0].layout.my_layout.locks = lk_array;
+ local->lock[0].layout.my_layout.lk_count = count;
+
+ ret = dht_blocking_inodelk(frame, lk_array, count,
+ dht_update_commit_hash_for_layout_resume);
+ if (ret < 0) {
+ local->lock[0].layout.my_layout.locks = NULL;
+ local->lock[0].layout.my_layout.lk_count = 0;
+ goto err;
+ }
+
+ return 0;
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free(lk_array, count);
+ GF_FREE(lk_array);
+ }
-int
-dht_dir_attr_heal_done (int ret, call_frame_t *sync_frame, void *data)
-{
- DHT_STACK_DESTROY (sync_frame);
- return 0;
+ return -1;
}
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index 042adf1714b..bb72b0ffbb5 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -8,18 +8,14 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
/* TODO: add NS locking */
-
-#include "statedump.h"
+#include <glusterfs/statedump.h>
#include "dht-common.h"
#include "dht-messages.h"
+#ifndef MAX
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#endif
/* TODO:
- use volumename in xattr instead of "dht"
@@ -27,874 +23,1082 @@
- handle all cases in self heal layout reconstruction
- complete linkfile selfheal
*/
-struct volume_options options[];
-extern dht_methods_t dht_methods;
-
-void
-dht_layout_dump (dht_layout_t *layout, const char *prefix)
+static void
+dht_layout_dump(dht_layout_t *layout, const char *prefix)
{
-
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
-
- if (!layout)
- goto out;
- if (!prefix)
- goto out;
-
- gf_proc_dump_build_key(key, prefix, "cnt");
- gf_proc_dump_write(key, "%d", layout->cnt);
- gf_proc_dump_build_key(key, prefix, "preset");
- gf_proc_dump_write(key, "%d", layout->preset);
- gf_proc_dump_build_key(key, prefix, "gen");
- gf_proc_dump_write(key, "%d", layout->gen);
- if (layout->type != IA_INVAL) {
- gf_proc_dump_build_key(key, prefix, "inode type");
- gf_proc_dump_write(key, "%d", layout->type);
- }
-
- if (!IA_ISDIR (layout->type))
- goto out;
-
- for (i = 0; i < layout->cnt; i++) {
- gf_proc_dump_build_key(key, prefix,"list[%d].err", i);
- gf_proc_dump_write(key, "%d", layout->list[i].err);
- gf_proc_dump_build_key(key, prefix,"list[%d].start", i);
- gf_proc_dump_write(key, "%u", layout->list[i].start);
- gf_proc_dump_build_key(key, prefix,"list[%d].stop", i);
- gf_proc_dump_write(key, "%u", layout->list[i].stop);
- if (layout->list[i].xlator) {
- gf_proc_dump_build_key(key, prefix,
- "list[%d].xlator.type", i);
- gf_proc_dump_write(key, "%s",
- layout->list[i].xlator->type);
- gf_proc_dump_build_key(key, prefix,
- "list[%d].xlator.name", i);
- gf_proc_dump_write(key, "%s",
- layout->list[i].xlator->name);
- }
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+
+ if (!layout)
+ goto out;
+
+ gf_proc_dump_build_key(key, prefix, "cnt");
+ gf_proc_dump_write(key, "%d", layout->cnt);
+ gf_proc_dump_build_key(key, prefix, "preset");
+ gf_proc_dump_write(key, "%d", layout->preset);
+ gf_proc_dump_build_key(key, prefix, "gen");
+ gf_proc_dump_write(key, "%d", layout->gen);
+ if (layout->type != IA_INVAL) {
+ gf_proc_dump_build_key(key, prefix, "inode type");
+ gf_proc_dump_write(key, "%d", layout->type);
+ }
+
+ if (!IA_ISDIR(layout->type))
+ goto out;
+
+ for (i = 0; i < layout->cnt; i++) {
+ gf_proc_dump_build_key(key, prefix, "list[%d].err", i);
+ gf_proc_dump_write(key, "%d", layout->list[i].err);
+ gf_proc_dump_build_key(key, prefix, "list[%d].start", i);
+ gf_proc_dump_write(key, "0x%x", layout->list[i].start);
+ gf_proc_dump_build_key(key, prefix, "list[%d].stop", i);
+ gf_proc_dump_write(key, "0x%x", layout->list[i].stop);
+ if (layout->list[i].xlator) {
+ gf_proc_dump_build_key(key, prefix, "list[%d].xlator.type", i);
+ gf_proc_dump_write(key, "%s", layout->list[i].xlator->type);
+ gf_proc_dump_build_key(key, prefix, "list[%d].xlator.name", i);
+ gf_proc_dump_write(key, "%s", layout->list[i].xlator->name);
}
+ }
out:
- return;
+ return;
}
-
int32_t
-dht_priv_dump (xlator_t *this)
+dht_priv_dump(xlator_t *this)
{
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
- dht_conf_t *conf = NULL;
- int ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
- if (!this)
- goto out;
+ if (!this)
+ goto out;
- conf = this->private;
- if (!conf)
- goto out;
+ conf = this->private;
+ if (!conf)
+ goto out;
- ret = TRY_LOCK(&conf->subvolume_lock);
- if (ret != 0) {
- return ret;
+ ret = TRY_LOCK(&conf->subvolume_lock);
+ if (ret != 0) {
+ return ret;
+ }
+
+ gf_proc_dump_add_section("xlator.cluster.dht.%s.priv", this->name);
+ gf_proc_dump_build_key(key_prefix, "xlator.cluster.dht", "%s.priv",
+ this->name);
+ gf_proc_dump_write("subvol_cnt", "%d", conf->subvolume_cnt);
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ snprintf(key, sizeof(key), "subvolumes[%d]", i);
+ gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type,
+ conf->subvolumes[i]->name);
+ if (conf->file_layouts && conf->file_layouts[i]) {
+ snprintf(key, sizeof(key), "file_layouts[%d]", i);
+ dht_layout_dump(conf->file_layouts[i], key);
}
-
- gf_proc_dump_add_section("xlator.cluster.dht.%s.priv", this->name);
- gf_proc_dump_build_key(key_prefix,"xlator.cluster.dht","%s.priv",
- this->name);
- gf_proc_dump_write("subvol_cnt","%d", conf->subvolume_cnt);
+ if (conf->dir_layouts && conf->dir_layouts[i]) {
+ snprintf(key, sizeof(key), "dir_layouts[%d]", i);
+ dht_layout_dump(conf->dir_layouts[i], key);
+ }
+ if (conf->subvolume_status) {
+ snprintf(key, sizeof(key), "subvolume_status[%d]", i);
+ gf_proc_dump_write(key, "%d", (int)conf->subvolume_status[i]);
+ }
+ }
+
+ gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed);
+ gf_proc_dump_write("gen", "%d", conf->gen);
+ gf_proc_dump_write("min_free_disk", "%lf", conf->min_free_disk);
+ gf_proc_dump_write("min_free_inodes", "%lf", conf->min_free_inodes);
+ gf_proc_dump_write("disk_unit", "%c", conf->disk_unit);
+ gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval);
+ gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit);
+ gf_proc_dump_write("use-readdirp", "%d", conf->use_readdirp);
+
+ if (conf->du_stats && conf->subvolume_status) {
for (i = 0; i < conf->subvolume_cnt; i++) {
- snprintf (key, sizeof (key), "subvolumes[%d]", i);
- gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type,
- conf->subvolumes[i]->name);
- if (conf->file_layouts && conf->file_layouts[i]){
- snprintf (key, sizeof (key), "file_layouts[%d]", i);
- dht_layout_dump(conf->file_layouts[i], key);
- }
- if (conf->dir_layouts && conf->dir_layouts[i]) {
- snprintf (key, sizeof (key), "dir_layouts[%d]", i);
- dht_layout_dump(conf->dir_layouts[i], key);
- }
- if (conf->subvolume_status) {
-
- snprintf (key, sizeof (key), "subvolume_status[%d]", i);
- gf_proc_dump_write(key, "%d",
- (int)conf->subvolume_status[i]);
- }
+ if (!conf->subvolume_status[i])
+ continue;
- }
+ snprintf(key, sizeof(key), "subvolumes[%d]", i);
+ gf_proc_dump_write(key, "%s", conf->subvolumes[i]->name);
+
+ snprintf(key, sizeof(key), "du_stats[%d].avail_percent", i);
+ gf_proc_dump_write(key, "%lf", conf->du_stats[i].avail_percent);
- gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed);
- gf_proc_dump_write("gen", "%d", conf->gen);
- gf_proc_dump_write("min_free_disk", "%lf", conf->min_free_disk);
- gf_proc_dump_write("min_free_inodes", "%lf", conf->min_free_inodes);
- gf_proc_dump_write("disk_unit", "%c", conf->disk_unit);
- gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval);
- gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit);
-
- if (conf->du_stats && conf->subvolume_status) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!conf->subvolume_status[i])
- continue;
-
- snprintf (key, sizeof (key), "subvolumes[%d]", i);
- gf_proc_dump_write (key, "%s",
- conf->subvolumes[i]->name);
-
- snprintf (key, sizeof (key),
- "du_stats[%d].avail_percent", i);
- gf_proc_dump_write (key, "%lf",
- conf->du_stats[i].avail_percent);
-
- snprintf (key, sizeof (key), "du_stats[%d].avail_space",
- i);
- gf_proc_dump_write (key, "%lu",
- conf->du_stats[i].avail_space);
-
- snprintf (key, sizeof (key),
- "du_stats[%d].avail_inodes", i);
- gf_proc_dump_write (key, "%lf",
- conf->du_stats[i].avail_inodes);
-
- snprintf (key, sizeof (key), "du_stats[%d].log", i);
- gf_proc_dump_write (key, "%lu",
- conf->du_stats[i].log);
- }
+ snprintf(key, sizeof(key), "du_stats[%d].avail_space", i);
+ gf_proc_dump_write(key, "%" PRIu64, conf->du_stats[i].avail_space);
+
+ snprintf(key, sizeof(key), "du_stats[%d].avail_inodes", i);
+ gf_proc_dump_write(key, "%lf", conf->du_stats[i].avail_inodes);
+
+ snprintf(key, sizeof(key), "du_stats[%d].log", i);
+ gf_proc_dump_write(key, "%" PRIu32, conf->du_stats[i].log);
}
+ }
- if (conf->last_stat_fetch.tv_sec)
- gf_proc_dump_write("last_stat_fetch", "%s",
- ctime(&conf->last_stat_fetch.tv_sec));
+ if (conf->last_stat_fetch)
+ gf_proc_dump_write("last_stat_fetch", "%s",
+ ctime(&conf->last_stat_fetch));
- UNLOCK(&conf->subvolume_lock);
+ UNLOCK(&conf->subvolume_lock);
out:
- return ret;
+ return ret;
}
int32_t
-dht_inodectx_dump (xlator_t *this, inode_t *inode)
+dht_inodectx_dump(xlator_t *this, inode_t *inode)
{
- int ret = -1;
- dht_layout_t *layout = NULL;
+ int ret = -1;
+ dht_layout_t *layout = NULL;
- if (!this)
- goto out;
- if (!inode)
- goto out;
+ if (!this)
+ goto out;
+ if (!inode)
+ goto out;
- ret = dht_inode_ctx_layout_get (inode, this, &layout);
+ ret = dht_inode_ctx_layout_get(inode, this, &layout);
- if ((ret != 0) || !layout)
- return ret;
+ if ((ret != 0) || !layout)
+ return ret;
- gf_proc_dump_add_section("xlator.cluster.dht.%s.inode", this->name);
- dht_layout_dump(layout, "layout");
+ gf_proc_dump_add_section("xlator.cluster.dht.%s.inode", this->name);
+ dht_layout_dump(layout, "layout");
out:
- return ret;
+ return ret;
}
void
-dht_fini (xlator_t *this)
+dht_fini(xlator_t *this)
{
- int i = 0;
- dht_conf_t *conf = NULL;
+ int i = 0;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+
+ conf = this->private;
+ this->private = NULL;
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ GF_FREE(conf->file_layouts[i]);
+ }
+ GF_FREE(conf->file_layouts);
+ }
- GF_VALIDATE_OR_GOTO ("dht", this, out);
+ dict_unref(conf->leaf_to_subvol);
- conf = this->private;
- this->private = NULL;
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
+ /* allocated in dht_init_subvolumes() */
+ GF_FREE(conf->subvolumes);
+ GF_FREE(conf->subvolume_status);
+ GF_FREE(conf->last_event);
+ GF_FREE(conf->subvol_up_time);
+ GF_FREE(conf->du_stats);
+ GF_FREE(conf->decommissioned_bricks);
- dict_destroy(conf->leaf_to_subvol);
+ /* allocated in dht_init() */
+ GF_FREE(conf->mds_xattr_key);
+ GF_FREE(conf->link_xattr_name);
+ GF_FREE(conf->commithash_xattr_name);
+ GF_FREE(conf->wild_xattr_name);
- GF_FREE (conf->subvolumes);
+ /* allocated in dht_init_regex() */
+ if (conf->rsync_regex_valid)
+ regfree(&conf->rsync_regex);
+ if (conf->extra_regex_valid)
+ regfree(&conf->extra_regex);
- GF_FREE (conf->subvolume_status);
+ synclock_destroy(&conf->link_lock);
- if (conf->lock_pool)
- mem_pool_destroy (conf->lock_pool);
+ if (conf->lock_pool)
+ mem_pool_destroy(conf->lock_pool);
- GF_FREE (conf);
- }
+ GF_FREE(conf);
+ }
out:
- return;
+ return;
}
int32_t
-mem_acct_init (xlator_t *this)
+mem_acct_init(xlator_t *this)
{
- int ret = -1;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
- ret = xlator_mem_acct_init (this, gf_dht_mt_end + 1);
+ ret = xlator_mem_acct_init(this, gf_dht_mt_end + 1);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-out:
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_NO_MEMORY,
+ "Memory accounting init failed");
return ret;
+ }
+out:
+ return ret;
}
-
-int
-dht_parse_decommissioned_bricks (xlator_t *this, dht_conf_t *conf,
- const char *bricks)
+static int
+dht_parse_decommissioned_bricks(xlator_t *this, dht_conf_t *conf,
+ const char *bricks)
{
- int i = 0;
- int ret = -1;
- char *tmpstr = NULL;
- char *dup_brick = NULL;
- char *node = NULL;
-
- if (!conf || !bricks)
- goto out;
-
- dup_brick = gf_strdup (bricks);
- node = strtok_r (dup_brick, ",", &tmpstr);
- while (node) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!strcmp (conf->subvolumes[i]->name, node)) {
- conf->decommissioned_bricks[i] =
- conf->subvolumes[i];
- conf->decommission_subvols_cnt++;
- gf_log (this->name, GF_LOG_INFO,
- "decommissioning subvolume %s",
- conf->subvolumes[i]->name);
- break;
- }
- }
- if (i == conf->subvolume_cnt) {
- /* Wrong node given. */
- goto out;
- }
- node = strtok_r (NULL, ",", &tmpstr);
+ int i = 0;
+ int ret = -1;
+ char *tmpstr = NULL;
+ char *dup_brick = NULL;
+ char *node = NULL;
+
+ if (!conf || !bricks)
+ goto out;
+
+ dup_brick = gf_strdup(bricks);
+ if (dup_brick == NULL) {
+ goto out;
+ }
+
+ node = strtok_r(dup_brick, ",", &tmpstr);
+ while (node) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!strcmp(conf->subvolumes[i]->name, node)) {
+ conf->decommissioned_bricks[i] = conf->subvolumes[i];
+ conf->decommission_subvols_cnt++;
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ DHT_MSG_SUBVOL_DECOMMISSION_INFO,
+ "decommissioning subvolume %s",
+ conf->subvolumes[i]->name);
+ break;
+ }
}
+ if (i == conf->subvolume_cnt) {
+ /* Wrong node given. */
+ goto out;
+ }
+ node = strtok_r(NULL, ",", &tmpstr);
+ }
- ret = 0;
- conf->decommission_in_progress = 1;
+ ret = 0;
+ conf->decommission_in_progress = 1;
out:
- GF_FREE (dup_brick);
+ GF_FREE(dup_brick);
- return ret;
+ return ret;
}
-int
-dht_decommissioned_remove (xlator_t *this, dht_conf_t *conf)
+static void
+dht_decommissioned_remove(xlator_t *this, dht_conf_t *conf)
{
- int i = 0;
- int ret = -1;
-
- if (!conf)
- goto out;
+ int i = 0;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->decommissioned_bricks[i]) {
- conf->decommissioned_bricks[i] = NULL;
- conf->decommission_subvols_cnt--;
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i]) {
+ conf->decommissioned_bricks[i] = NULL;
+ conf->decommission_subvols_cnt--;
}
-
- ret = 0;
-out:
-
- return ret;
+ }
}
-void
-dht_init_regex (xlator_t *this, dict_t *odict, char *name,
- regex_t *re, gf_boolean_t *re_valid)
+
+static void
+dht_init_regex(xlator_t *this, dict_t *odict, char *name, regex_t *re,
+ gf_boolean_t *re_valid, dht_conf_t *conf)
{
- char *temp_str;
+ char *temp_str = NULL;
- if (dict_get_str (odict, name, &temp_str) != 0) {
- if (strcmp(name,"rsync-hash-regex")) {
- return;
- }
- temp_str = "^\\.(.+)\\.[^.]+$";
+ if (dict_get_str(odict, name, &temp_str) != 0) {
+ if (strcmp(name, "rsync-hash-regex")) {
+ return;
}
+ temp_str = "^\\.(.+)\\.[^.]+$";
+ }
+ LOCK(&conf->lock);
+ {
if (*re_valid) {
- regfree(re);
- *re_valid = _gf_false;
+ regfree(re);
+ *re_valid = _gf_false;
}
- if (!strcmp(temp_str,"none")) {
- return;
+ if (!strcmp(temp_str, "none")) {
+ goto unlock;
}
- if (regcomp(re,temp_str,REG_EXTENDED) == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "using regex %s = %s", name, temp_str);
- *re_valid = _gf_true;
- }
- else {
- gf_log (this->name, GF_LOG_WARNING,
- "compiling regex %s failed", temp_str);
+ if (regcomp(re, temp_str, REG_EXTENDED) == 0) {
+ gf_msg_debug(this->name, 0, "using regex %s = %s", name, temp_str);
+ *re_valid = _gf_true;
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_REGEX_INFO,
+ "compiling regex %s failed", temp_str);
}
+ }
+unlock:
+ UNLOCK(&conf->lock);
}
int
dht_set_subvol_range(xlator_t *this)
{
- int ret = -1;
- dht_conf_t *conf = NULL;
+ int ret = -1;
+ dht_conf_t *conf = NULL;
- conf = this->private;
+ conf = this->private;
- if (!conf)
- goto out;
+ if (!conf)
+ goto out;
- conf->leaf_to_subvol = dict_new();
- if (!conf->leaf_to_subvol)
- goto out;
+ conf->leaf_to_subvol = dict_new();
+ if (!conf->leaf_to_subvol)
+ goto out;
- ret = glusterfs_reachable_leaves(this, conf->leaf_to_subvol);
+ ret = glusterfs_reachable_leaves(this, conf->leaf_to_subvol);
out:
- return ret;
+ return ret;
+}
+
+static int
+dht_configure_throttle(xlator_t *this, dht_conf_t *conf, char *temp_str)
+{
+ int rebal_thread_count = 0;
+ int ret = 0;
+
+ pthread_mutex_lock(&conf->defrag->dfq_mutex);
+ {
+ if (!strcasecmp(temp_str, "lazy")) {
+ conf->defrag->recon_thread_count = 1;
+ } else if (!strcasecmp(temp_str, "normal")) {
+ conf->defrag->recon_thread_count = 2;
+ } else if (!strcasecmp(temp_str, "aggressive")) {
+ conf->defrag->recon_thread_count = MAX(MAX_REBAL_THREADS - 4, 4);
+ } else if ((gf_string2int(temp_str, &rebal_thread_count) == 0)) {
+ if ((rebal_thread_count > 0) &&
+ (rebal_thread_count <= MAX_REBAL_THREADS)) {
+ conf->defrag->recon_thread_count = rebal_thread_count;
+ pthread_mutex_unlock(&conf->defrag->dfq_mutex);
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "rebal thread count configured to %d",
+ rebal_thread_count);
+ goto out;
+ } else {
+ pthread_mutex_unlock(&conf->defrag->dfq_mutex);
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION,
+ "Invalid option: Reconfigure: "
+ "rebal-throttle should be "
+ "within range of 0 and maximum number of"
+ " cores available");
+ ret = -1;
+ goto out;
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION,
+ "Invalid option: Reconfigure: "
+ "rebal-throttle should be {lazy|normal|aggressive}"
+ " or a number up to the number of cores available,"
+ " not (%s), defaulting to (%d)",
+ temp_str, conf->dthrottle);
+ ret = -1;
+ }
+ }
+ pthread_mutex_unlock(&conf->defrag->dfq_mutex);
+
+out:
+ return ret;
}
int
-dht_reconfigure (xlator_t *this, dict_t *options)
+dht_reconfigure(xlator_t *this, dict_t *options)
{
- dht_conf_t *conf = NULL;
- char *temp_str = NULL;
- gf_boolean_t search_unhashed;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", options, out);
-
- conf = this->private;
- if (!conf)
- return 0;
-
- if (dict_get_str (options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean*/
- if (strcasecmp (temp_str, "auto")) {
- if (!gf_string2boolean (temp_str, &search_unhashed)) {
- gf_msg_debug(this->name, 0, "Reconfigure: "
- "lookup-unhashed reconfigured(%s)",
- temp_str);
- conf->search_unhashed = search_unhashed;
- } else {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- DHT_MSG_INVALID_OPTION,
- "Invalid option: Reconfigure: "
- "lookup-unhashed should be boolean,"
- " not (%s), defaulting to (%d)",
- temp_str, conf->search_unhashed);
- ret = -1;
- goto out;
- }
- } else {
- gf_msg_debug(this->name, 0, "Reconfigure:"
- " lookup-unhashed reconfigured auto ");
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- }
+ dht_conf_t *conf = NULL;
+ char *temp_str = NULL;
+ gf_boolean_t search_unhashed;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", options, out);
+
+ conf = this->private;
+ if (!conf)
+ return 0;
+
+ if (dict_get_str(options, "lookup-unhashed", &temp_str) == 0) {
+ /* If option is not "auto", other options _should_ be boolean*/
+ if (strcasecmp(temp_str, "auto")) {
+ if (!gf_string2boolean(temp_str, &search_unhashed)) {
+ gf_msg_debug(this->name, 0,
+ "Reconfigure: "
+ "lookup-unhashed reconfigured(%s)",
+ temp_str);
+ conf->search_unhashed = search_unhashed;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION,
+ "Invalid option: Reconfigure: "
+ "lookup-unhashed should be boolean,"
+ " not (%s), defaulting to (%d)",
+ temp_str, conf->search_unhashed);
+ ret = -1;
+ goto out;
+ }
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Reconfigure:"
+ " lookup-unhashed reconfigured auto ");
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
}
+ }
+
+ GF_OPTION_RECONF("lookup-optimize", conf->lookup_optimize, options, bool,
+ out);
+
+ GF_OPTION_RECONF("min-free-disk", conf->min_free_disk, options,
+ percent_or_size, out);
+ /* option can be any one of percent or bytes */
+ conf->disk_unit = 0;
+ if (conf->min_free_disk < 100.0)
+ conf->disk_unit = 'p';
- GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options,
- percent_or_size, out);
- /* option can be any one of percent or bytes */
- conf->disk_unit = 0;
- if (conf->min_free_disk < 100.0)
- conf->disk_unit = 'p';
+ GF_OPTION_RECONF("min-free-inodes", conf->min_free_inodes, options, percent,
+ out);
- GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options,
- percent, out);
+ GF_OPTION_RECONF("directory-layout-spread", conf->dir_spread_cnt, options,
+ uint32, out);
- GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt,
- options, uint32, out);
+ GF_OPTION_RECONF("readdir-optimize", conf->readdir_optimize, options, bool,
+ out);
+ GF_OPTION_RECONF("randomize-hash-range-by-gfid", conf->randomize_by_gfid,
+ options, bool, out);
- GF_OPTION_RECONF ("readdir-optimize", conf->readdir_optimize, options,
- bool, out);
- GF_OPTION_RECONF ("randomize-hash-range-by-gfid",
- conf->randomize_by_gfid,
- options, bool, out);
+ GF_OPTION_RECONF("lock-migration", conf->lock_migration_enabled, options,
+ bool, out);
- if (conf->defrag) {
- GF_OPTION_RECONF ("rebalance-stats", conf->defrag->stats,
- options, bool, out);
+ GF_OPTION_RECONF("force-migration", conf->force_migration, options, bool,
+ out);
+
+ if (conf->defrag) {
+ if (dict_get_str(options, "rebal-throttle", &temp_str) == 0) {
+ ret = dht_configure_throttle(this, conf, temp_str);
+ if (ret == -1)
+ goto out;
}
+ }
+
+ if (conf->defrag) {
+ conf->defrag->lock_migration_enabled = conf->lock_migration_enabled;
+ }
+
+ if (conf->defrag) {
+ GF_OPTION_RECONF("rebalance-stats", conf->defrag->stats, options, bool,
+ out);
+ }
+
+ if (dict_get_str(options, "decommissioned-bricks", &temp_str) == 0) {
+ ret = dht_parse_decommissioned_bricks(this, conf, temp_str);
+ if (ret == -1)
+ goto out;
+ } else {
+ dht_decommissioned_remove(this, conf);
+ }
+
+ dht_init_regex(this, options, "rsync-hash-regex", &conf->rsync_regex,
+ &conf->rsync_regex_valid, conf);
+ dht_init_regex(this, options, "extra-hash-regex", &conf->extra_regex,
+ &conf->extra_regex_valid, conf);
+
+ GF_OPTION_RECONF("weighted-rebalance", conf->do_weighting, options, bool,
+ out);
+
+ GF_OPTION_RECONF("use-readdirp", conf->use_readdirp, options, bool, out);
+ ret = 0;
+out:
+ return ret;
+}
- if (dict_get_str (options, "decommissioned-bricks", &temp_str) == 0) {
- ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
- if (ret == -1)
- goto out;
- } else {
- ret = dht_decommissioned_remove (this, conf);
- if (ret == -1)
- goto out;
+static int
+gf_defrag_pattern_list_fill(xlator_t *this, gf_defrag_info_t *defrag,
+ char *data)
+{
+ int ret = -1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *dup_str = NULL;
+ char *num = NULL;
+ char *pattern_str = NULL;
+ char *pattern = NULL;
+ gf_defrag_pattern_list_t *temp_list = NULL;
+ gf_defrag_pattern_list_t *pattern_list = NULL;
+
+ if (!this || !defrag || !data)
+ goto out;
+
+ /* Get the pattern for pattern list. "pattern:<optional-size>"
+ * eg: *avi, *pdf:10MB, *:1TB
+ */
+ pattern_str = strtok_r(data, ",", &tmp_str);
+ while (pattern_str) {
+ dup_str = gf_strdup(pattern_str);
+ if (!dup_str)
+ goto out;
+ pattern_list = GF_CALLOC(1, sizeof(gf_defrag_pattern_list_t), 1);
+ if (!pattern_list) {
+ goto out;
+ }
+ pattern = strtok_r(dup_str, ":", &tmp_str1);
+ num = strtok_r(NULL, ":", &tmp_str1);
+ if (!pattern)
+ goto out;
+ if (!num) {
+ if (gf_string2bytesize_uint64(pattern, &pattern_list->size) == 0) {
+ pattern = "*";
+ }
+ } else if (gf_string2bytesize_uint64(num, &pattern_list->size) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION,
+ "Invalid option. Defrag pattern:"
+ " Invalid number format \"%s\"",
+ num);
+ goto out;
}
+ memcpy(pattern_list->path_pattern, pattern, strlen(dup_str));
- dht_init_regex (this, options, "rsync-hash-regex",
- &conf->rsync_regex, &conf->rsync_regex_valid);
- dht_init_regex (this, options, "extra-hash-regex",
- &conf->extra_regex, &conf->extra_regex_valid);
+ if (!defrag->defrag_pattern)
+ temp_list = NULL;
+ else
+ temp_list = defrag->defrag_pattern;
- GF_OPTION_RECONF ("weighted-rebalance", conf->do_weighting, options,
- bool, out);
+ pattern_list->next = temp_list;
- ret = 0;
+ defrag->defrag_pattern = pattern_list;
+ pattern_list = NULL;
+
+ GF_FREE(dup_str);
+ dup_str = NULL;
+
+ pattern_str = strtok_r(NULL, ",", &tmp_str);
+ }
+
+ ret = 0;
out:
- return ret;
+ if (ret)
+ GF_FREE(pattern_list);
+ GF_FREE(dup_str);
+
+ return ret;
}
static int
-gf_defrag_pattern_list_fill (xlator_t *this, gf_defrag_info_t *defrag, char *data)
+dht_init_methods(xlator_t *this)
{
- int ret = -1;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *dup_str = NULL;
- char *num = NULL;
- char *pattern_str = NULL;
- char *pattern = NULL;
- gf_defrag_pattern_list_t *temp_list = NULL;
- gf_defrag_pattern_list_t *pattern_list = NULL;
-
- if (!this || !defrag || !data)
- goto out;
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
- /* Get the pattern for pattern list. "pattern:<optional-size>"
- * eg: *avi, *pdf:10MB, *:1TB
- */
- pattern_str = strtok_r (data, ",", &tmp_str);
- while (pattern_str) {
- dup_str = gf_strdup (pattern_str);
- pattern_list = GF_CALLOC (1, sizeof (gf_defrag_pattern_list_t),
- 1);
- if (!pattern_list) {
- goto out;
- }
- pattern = strtok_r (dup_str, ":", &tmp_str1);
- num = strtok_r (NULL, ":", &tmp_str1);
- if (!pattern)
- goto out;
- if (!num) {
- if (gf_string2bytesize_uint64(pattern, &pattern_list->size)
- == 0) {
- pattern = "*";
- }
- } else if (gf_string2bytesize_uint64 (num, &pattern_list->size) != 0) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_INVALID_OPTION,
- "Invalid option. Defrag pattern:"
- " Invalid number format \"%s\"", num);
- goto out;
- }
- memcpy (pattern_list->path_pattern, pattern, strlen (dup_str));
-
- if (!defrag->defrag_pattern)
- temp_list = NULL;
- else
- temp_list = defrag->defrag_pattern;
-
- pattern_list->next = temp_list;
-
- defrag->defrag_pattern = pattern_list;
- pattern_list = NULL;
-
- GF_FREE (dup_str);
- dup_str = NULL;
-
- pattern_str = strtok_r (NULL, ",", &tmp_str);
- }
+ GF_VALIDATE_OR_GOTO("dht", this, err);
- ret = 0;
-out:
- if (ret)
- GF_FREE (pattern_list);
- GF_FREE (dup_str);
+ conf = this->private;
+ methods = &(conf->methods);
- return ret;
+ methods->migration_get_dst_subvol = dht_migration_get_dst_subvol;
+ methods->migration_other = NULL;
+ methods->layout_search = dht_layout_search;
+
+ ret = 0;
+err:
+ return ret;
}
int
-dht_init (xlator_t *this)
+dht_init(xlator_t *this)
{
- dht_conf_t *conf = NULL;
- char *temp_str = NULL;
- int ret = -1;
- int i = 0;
- gf_defrag_info_t *defrag = NULL;
- int cmd = 0;
- char *node_uuid = NULL;
-
-
- GF_VALIDATE_OR_GOTO ("dht", this, err);
-
- if (!this->children) {
- gf_msg (this->name, GF_LOG_CRITICAL, 0,
- DHT_MSG_INVALID_CONFIGURATION,
- "Distribute needs more than one subvolume");
- return -1;
- }
-
- if (!this->parents) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_INVALID_CONFIGURATION,
- "dangling volume. check volfile");
- }
+ dht_conf_t *conf = NULL;
+ char *temp_str = NULL;
+ int ret = -1;
+ int i = 0;
+ gf_defrag_info_t *defrag = NULL;
+ int cmd = 0;
+ char *node_uuid = NULL;
+ uint32_t commit_hash = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", this, err);
+
+ if (!this->children) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_INVALID_CONFIGURATION,
+ "Distribute needs more than one subvolume");
+ return -1;
+ }
- conf = GF_CALLOC (1, sizeof (*conf), gf_dht_mt_dht_conf_t);
- if (!conf) {
- goto err;
- }
+ if (!this->parents) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_CONFIGURATION,
+ "dangling volume. check volfile");
+ }
- ret = dict_get_int32 (this->options, "rebalance-cmd", &cmd);
+ conf = GF_CALLOC(1, sizeof(*conf), gf_dht_mt_dht_conf_t);
+ if (!conf) {
+ goto err;
+ }
- if (cmd) {
- defrag = GF_CALLOC (1, sizeof (gf_defrag_info_t),
- gf_defrag_info_mt);
+ LOCK_INIT(&conf->subvolume_lock);
+ LOCK_INIT(&conf->layout_lock);
+ LOCK_INIT(&conf->lock);
+ synclock_init(&conf->link_lock, SYNC_LOCK_DEFAULT);
- GF_VALIDATE_OR_GOTO (this->name, defrag, err);
+ /* We get the commit-hash to set only for rebalance process */
+ if (dict_get_uint32(this->options, "commit-hash", &commit_hash) == 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_COMMIT_HASH_INFO,
+ "%s using commit hash %u", __func__, commit_hash);
+ conf->vol_commit_hash = commit_hash;
+ conf->vch_forced = _gf_true;
+ }
- LOCK_INIT (&defrag->lock);
+ ret = dict_get_int32(this->options, "rebalance-cmd", &cmd);
- defrag->is_exiting = 0;
+ if (cmd) {
+ defrag = GF_CALLOC(1, sizeof(gf_defrag_info_t), gf_defrag_info_mt);
- conf->defrag = defrag;
+ GF_VALIDATE_OR_GOTO(this->name, defrag, err);
- ret = dict_get_str (this->options, "node-uuid", &node_uuid);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_INVALID_CONFIGURATION,
- "Invalid volume configuration: "
- "node-uuid not specified");
- goto err;
- }
+ LOCK_INIT(&defrag->lock);
- if (gf_uuid_parse (node_uuid, defrag->node_uuid)) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_INVALID_OPTION, "Invalid option:"
- " Cannot parse glusterd node uuid");
- goto err;
- }
+ defrag->is_exiting = 0;
- defrag->cmd = cmd;
+ conf->defrag = defrag;
+ defrag->this = this;
- defrag->stats = _gf_false;
+ ret = dict_get_str(this->options, "node-uuid", &node_uuid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_CONFIGURATION,
+ "Invalid volume configuration: "
+ "node-uuid not specified");
+ goto err;
}
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
- if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean */
- if (strcasecmp (temp_str, "auto")) {
- ret = gf_string2boolean (temp_str,
- &conf->search_unhashed);
- if (ret == -1)
- goto err;
- }
- else
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
+ if (gf_uuid_parse(node_uuid, defrag->node_uuid)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION,
+ "Invalid option:"
+ " Cannot parse glusterd node uuid");
+ goto err;
}
- GF_OPTION_INIT ("unhashed-sticky-bit", conf->unhashed_sticky_bit, bool,
- err);
+ defrag->cmd = cmd;
- GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err);
+ defrag->stats = _gf_false;
- GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size,
- err);
+ defrag->queue = NULL;
- GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent,
- err);
+ defrag->crawl_done = 0;
- conf->dir_spread_cnt = conf->subvolume_cnt;
- GF_OPTION_INIT ("directory-layout-spread", conf->dir_spread_cnt,
- uint32, err);
+ defrag->global_error = 0;
- GF_OPTION_INIT ("assert-no-child-down", conf->assert_no_child_down,
- bool, err);
+ defrag->q_entry_count = 0;
- GF_OPTION_INIT ("readdir-optimize", conf->readdir_optimize, bool, err);
+ defrag->wakeup_crawler = 0;
- if (defrag) {
- GF_OPTION_INIT ("rebalance-stats", defrag->stats, bool, err);
- if (dict_get_str (this->options, "rebalance-filter", &temp_str)
- == 0) {
- if (gf_defrag_pattern_list_fill (this, defrag, temp_str)
- == -1) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_INVALID_OPTION,
- "Invalid option:"
- " Cannot parse rebalance-filter (%s)",
- temp_str);
+ pthread_mutex_init(&defrag->dfq_mutex, 0);
+ pthread_cond_init(&defrag->parallel_migration_cond, 0);
+ pthread_cond_init(&defrag->rebalance_crawler_alarm, 0);
+ pthread_cond_init(&defrag->df_wakeup_thread, 0);
- goto err;
- }
- }
- }
+ pthread_mutex_init(&defrag->fc_mutex, 0);
+ pthread_cond_init(&defrag->fc_wakeup_cond, 0);
- /* option can be any one of percent or bytes */
- conf->disk_unit = 0;
- if (conf->min_free_disk < 100)
- conf->disk_unit = 'p';
+ defrag->global_error = 0;
+ }
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
+ conf->use_fallocate = 1;
+
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
+ if (dict_get_str(this->options, "lookup-unhashed", &temp_str) == 0) {
+ /* If option is not "auto", other options _should_ be boolean */
+ if (strcasecmp(temp_str, "auto")) {
+ gf_boolean_t search_unhashed_bool;
+ ret = gf_string2boolean(temp_str, &search_unhashed_bool);
+ if (ret == -1) {
goto err;
+ }
+ conf->search_unhashed = search_unhashed_bool
+ ? GF_DHT_LOOKUP_UNHASHED_ON
+ : GF_DHT_LOOKUP_UNHASHED_OFF;
+ } else {
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
}
+ }
- if (dict_get_str (this->options, "decommissioned-bricks", &temp_str) == 0) {
- ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
- if (ret == -1)
- goto err;
- }
+ GF_OPTION_INIT("lookup-optimize", conf->lookup_optimize, bool, err);
- dht_init_regex (this, this->options, "rsync-hash-regex",
- &conf->rsync_regex, &conf->rsync_regex_valid);
- dht_init_regex (this, this->options, "extra-hash-regex",
- &conf->extra_regex, &conf->extra_regex_valid);
+ GF_OPTION_INIT("unhashed-sticky-bit", conf->unhashed_sticky_bit, bool, err);
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
- }
+ GF_OPTION_INIT("use-readdirp", conf->use_readdirp, bool, err);
- LOCK_INIT (&conf->subvolume_lock);
- LOCK_INIT (&conf->layout_lock);
+ GF_OPTION_INIT("min-free-disk", conf->min_free_disk, percent_or_size, err);
- conf->gen = 1;
+ GF_OPTION_INIT("min-free-inodes", conf->min_free_inodes, percent, err);
- this->local_pool = mem_pool_new (dht_local_t, 512);
- if (!this->local_pool) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_INIT_FAILED,
- " DHT initialisation failed. "
- "failed to create local_t's memory pool");
- goto err;
- }
+ conf->dir_spread_cnt = conf->subvolume_cnt;
+ GF_OPTION_INIT("directory-layout-spread", conf->dir_spread_cnt, uint32,
+ err);
- GF_OPTION_INIT ("randomize-hash-range-by-gfid",
- conf->randomize_by_gfid, bool, err);
+ GF_OPTION_INIT("assert-no-child-down", conf->assert_no_child_down, bool,
+ err);
- GF_OPTION_INIT ("xattr-name", conf->xattr_name, str, err);
- gf_asprintf (&conf->link_xattr_name, "%s."DHT_LINKFILE_STR,
- conf->xattr_name);
- gf_asprintf (&conf->wild_xattr_name, "%s*", conf->xattr_name);
- if (!conf->link_xattr_name || !conf->wild_xattr_name) {
- goto err;
- }
+ GF_OPTION_INIT("readdir-optimize", conf->readdir_optimize, bool, err);
+
+ GF_OPTION_INIT("lock-migration", conf->lock_migration_enabled, bool, err);
- GF_OPTION_INIT ("weighted-rebalance", conf->do_weighting, bool, err);
+ GF_OPTION_INIT("force-migration", conf->force_migration, bool, err);
- conf->lock_pool = mem_pool_new (dht_lock_t, 512);
- if (!conf->lock_pool) {
- gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_INIT_FAILED,
- "failed to create lock mem_pool, failing "
- "initialization");
+ if (defrag) {
+ defrag->lock_migration_enabled = conf->lock_migration_enabled;
+
+ GF_OPTION_INIT("rebalance-stats", defrag->stats, bool, err);
+ if (dict_get_str(this->options, "rebalance-filter", &temp_str) == 0) {
+ if (gf_defrag_pattern_list_fill(this, defrag, temp_str) == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION,
+ "Invalid option:"
+ " Cannot parse rebalance-filter (%s)",
+ temp_str);
+
+ goto err;
+ }
+ }
+ }
+
+ /* option can be any one of percent or bytes */
+ conf->disk_unit = 0;
+ if (conf->min_free_disk < 100)
+ conf->disk_unit = 'p';
+
+ ret = dht_init_subvolumes(this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ if (cmd) {
+ ret = dht_init_local_subvolumes(this, conf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_INIT_LOCAL_SUBVOL_FAILED,
+ "dht_init_local_subvolumes failed");
+ goto err;
+ }
+ }
+
+ if (dict_get_str(this->options, "decommissioned-bricks", &temp_str) == 0) {
+ ret = dht_parse_decommissioned_bricks(this, conf, temp_str);
+ if (ret == -1)
+ goto err;
+ }
+
+ dht_init_regex(this, this->options, "rsync-hash-regex", &conf->rsync_regex,
+ &conf->rsync_regex_valid, conf);
+ dht_init_regex(this, this->options, "extra-hash-regex", &conf->extra_regex,
+ &conf->extra_regex_valid, conf);
+
+ ret = dht_layouts_init(this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ conf->gen = 1;
+
+ this->local_pool = mem_pool_new(dht_local_t, 512);
+ if (!this->local_pool) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ " DHT initialisation failed. "
+ "failed to create local_t's memory pool");
+ goto err;
+ }
+
+ GF_OPTION_INIT("randomize-hash-range-by-gfid", conf->randomize_by_gfid,
+ bool, err);
+
+ if (defrag) {
+ GF_OPTION_INIT("rebal-throttle", temp_str, str, err);
+ if (temp_str) {
+ ret = dht_configure_throttle(this, conf, temp_str);
+ if (ret == -1)
goto err;
}
+ }
- this->private = conf;
+ GF_OPTION_INIT("xattr-name", conf->xattr_name, str, err);
+ gf_asprintf(&conf->mds_xattr_key, "%s." DHT_MDS_STR, conf->xattr_name);
+ gf_asprintf(&conf->link_xattr_name, "%s." DHT_LINKFILE_STR,
+ conf->xattr_name);
+ gf_asprintf(&conf->commithash_xattr_name, "%s." DHT_COMMITHASH_STR,
+ conf->xattr_name);
+ gf_asprintf(&conf->wild_xattr_name, "%s*", conf->xattr_name);
+ if (!conf->link_xattr_name || !conf->wild_xattr_name) {
+ goto err;
+ }
- if (dht_set_subvol_range(this))
- goto err;
+ GF_OPTION_INIT("weighted-rebalance", conf->do_weighting, bool, err);
- conf->methods = &dht_methods;
+ conf->lock_pool = mem_pool_new(dht_lock_t, 512);
+ if (!conf->lock_pool) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INIT_FAILED,
+ "failed to create lock mem_pool, failing "
+ "initialization");
+ goto err;
+ }
- return 0;
+ this->private = conf;
+
+ if (dht_set_subvol_range(this))
+ goto err;
+
+ if (dht_init_methods(this))
+ goto err;
+
+ return 0;
err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ GF_FREE(conf->file_layouts[i]);
+ }
+ GF_FREE(conf->file_layouts);
+ }
- GF_FREE (conf->subvolumes);
+ GF_FREE(conf->subvolumes);
- GF_FREE (conf->subvolume_status);
+ GF_FREE(conf->subvolume_status);
- GF_FREE (conf->du_stats);
+ GF_FREE(conf->du_stats);
- GF_FREE (conf->defrag);
+ GF_FREE(conf->defrag);
- GF_FREE (conf->xattr_name);
- GF_FREE (conf->link_xattr_name);
- GF_FREE (conf->wild_xattr_name);
+ GF_FREE(conf->xattr_name);
+ GF_FREE(conf->link_xattr_name);
+ GF_FREE(conf->wild_xattr_name);
+ GF_FREE(conf->mds_xattr_key);
- if (conf->lock_pool)
- mem_pool_destroy (conf->lock_pool);
+ if (conf->lock_pool)
+ mem_pool_destroy(conf->lock_pool);
- GF_FREE (conf);
- }
+ GF_FREE(conf);
+ }
- return -1;
+ return -1;
}
-
-struct volume_options options[] = {
- { .key = {"lookup-unhashed"},
- .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
- "on", "off"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "on",
- .description = "This option if set to ON, does a lookup through "
- "all the sub-volumes, in case a lookup didn't return any result "
- "from the hash subvolume. If set to OFF, it does not do a lookup "
- "on the remaining subvolumes."
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- .default_value = "10%",
- .description = "Percentage/Size of disk space, after which the "
- "process starts balancing out the cluster, and logs will appear "
- "in log files",
- },
- { .key = {"min-free-inodes"},
- .type = GF_OPTION_TYPE_PERCENT,
- .default_value = "5%",
- .description = "after system has only N% of inodes, warnings "
- "starts to appear in log files",
- },
- { .key = {"unhashed-sticky-bit"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"use-readdirp"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- .description = "This option if set to ON, forces the use of "
- "readdirp, and hence also displays the stats of the files."
- },
- { .key = {"assert-no-child-down"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- .description = "This option if set to ON, in the event of "
- "CHILD_DOWN, will call exit."
- },
- { .key = {"directory-layout-spread"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .validate = GF_OPT_VALIDATE_MIN,
- .description = "Specifies the directory layout spread. Takes number "
- "of subvolumes as default value."
- },
- { .key = {"decommissioned-bricks"},
- .type = GF_OPTION_TYPE_ANY,
- .description = "This option if set to ON, decommissions "
- "the brick, so that no new data is allowed to be created "
- "on that brick."
- },
- { .key = {"rebalance-cmd"},
- .type = GF_OPTION_TYPE_INT,
- },
- { .key = {"node-uuid"},
- .type = GF_OPTION_TYPE_STR,
- },
- { .key = {"rebalance-stats"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- .description = "This option if set to ON displays and logs the "
- " time taken for migration of each file, during the rebalance "
- "process. If set to OFF, the rebalance logs will only display the "
- "time spent in each directory."
- },
- { .key = {"readdir-optimize"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- .description = "This option if set to ON enables the optimization "
- "that allows DHT to requests non-first subvolumes to filter out "
- "directory entries."
- },
- { .key = {"rsync-hash-regex"},
- .type = GF_OPTION_TYPE_STR,
- /* Setting a default here doesn't work. See dht_init_regex. */
- .description = "Regular expression for stripping temporary-file "
- "suffix and prefix used by rsync, to prevent relocation when the "
- "file is renamed."
- },
- { .key = {"extra-hash-regex"},
- .type = GF_OPTION_TYPE_STR,
- /* Setting a default here doesn't work. See dht_init_regex. */
- .description = "Regular expression for stripping temporary-file "
- "suffix and prefix used by an application, to prevent relocation when "
- "the file is renamed."
- },
- { .key = {"rebalance-filter"},
- .type = GF_OPTION_TYPE_STR,
- },
-
- { .key = {"xattr-name"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "trusted.glusterfs.dht",
- .description = "Base for extended attributes used by this "
- "translator instance, to avoid conflicts with others above or "
- "below it."
- },
-
- { .key = {"weighted-rebalance"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- .description = "When enabled, files will be allocated to bricks "
- "with a probability proportional to their size. Otherwise, all "
- "bricks will have the same probability (legacy behavior)."
- },
-
- /* NUFA option */
- { .key = {"local-volume-name"},
- .type = GF_OPTION_TYPE_XLATOR
- },
-
- /* tier options */
- { .key = {"tier-promote-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "120",
- .description = "Frequency to promote files to fast tier"
- },
-
- { .key = {"tier-demote-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "120",
- .description = "Frequency to demote files to slow tier"
- },
-
- { .key = {"write-freq-threshold"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "0",
- .description = "Defines the write fequency "
- "that would be considered hot"
- },
-
- { .key = {"read-freq-threshold"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "0",
- .description = "Defines the read fequency "
- "that would be considered hot"
- },
-
- /* switch option */
- { .key = {"pattern.switch.case"},
- .type = GF_OPTION_TYPE_ANY
- },
-
- { .key = {"randomize-hash-range-by-gfid"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- .description = "Use gfid of directory to determine the subvolume "
- "from which hash ranges are allocated starting with 0. "
- "Note that we still use a directory/file's name to determine the "
- "subvolume to which it hashes"
- },
-
- { .key = {NULL} },
+struct volume_options dht_options[] = {
+ {
+ .key = {"lookup-unhashed"},
+ .value = {"auto", "yes", "no", "enable", "disable", "1", "0", "on",
+ "off"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "on",
+ .description =
+ "This option if set to ON, does a lookup through "
+ "all the sub-volumes, in case a lookup didn't return any result "
+ "from the hash subvolume. If set to OFF, it does not do a lookup "
+ "on the remaining subvolumes.",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .level = OPT_STATUS_BASIC,
+ },
+ {.key = {"lookup-optimize"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description =
+ "This option if set to ON enables the optimization "
+ "of -ve lookups, by not doing a lookup on non-hashed subvolumes for "
+ "files, in case the hashed subvolume does not return any result. "
+ "This option disregards the lookup-unhashed setting, when enabled.",
+ .op_version = {GD_OP_VERSION_3_7_2},
+ .level = OPT_STATUS_ADVANCED,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"min-free-disk"},
+ .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
+ .default_value = "10%",
+ .description =
+ "Percentage/Size of disk space, after which the "
+ "process starts balancing out the cluster, and logs will appear "
+ "in log files",
+ .op_version = {1},
+ .level = OPT_STATUS_BASIC,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"min-free-inodes"},
+ .type = GF_OPTION_TYPE_PERCENT,
+ .default_value = "5%",
+ .description = "after system has only N% of inodes, warnings "
+ "starts to appear in log files",
+ .op_version = {1},
+ .level = OPT_STATUS_BASIC,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {
+ .key = {"unhashed-sticky-bit"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+ {.key = {"use-readdirp"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "This option if set to ON, forces the use of "
+ "readdirp, and hence also displays the stats of the files.",
+ .level = OPT_STATUS_ADVANCED,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"assert-no-child-down"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON, in the event of "
+ "CHILD_DOWN, will call exit."},
+ {
+ .key = {"directory-layout-spread"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Specifies the directory layout spread. Takes number "
+ "of subvolumes as default value.",
+
+ .op_version = {2},
+ },
+ {
+ .key = {"decommissioned-bricks"},
+ .type = GF_OPTION_TYPE_ANY,
+ .description =
+ "This option if set to ON, decommissions "
+ "the brick, so that no new data is allowed to be created "
+ "on that brick.",
+ .level = OPT_STATUS_ADVANCED,
+ },
+ {
+ .key = {"rebalance-cmd"},
+ .type = GF_OPTION_TYPE_INT,
+ },
+ {
+ .key = {"commit-hash"},
+ .type = GF_OPTION_TYPE_INT,
+ },
+ {
+ .key = {"node-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+ {
+ .key = {"rebalance-stats"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description =
+ "This option if set to ON displays and logs the "
+ " time taken for migration of each file, during the rebalance "
+ "process. If set to OFF, the rebalance logs will only display the "
+ "time spent in each directory.",
+ .op_version = {2},
+ .level = OPT_STATUS_BASIC,
+ },
+ {.key = {"readdir-optimize"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description =
+ "This option if set to ON enables the optimization "
+ "that allows DHT to requests non-first subvolumes to filter out "
+ "directory entries.",
+ .op_version = {1},
+ .level = OPT_STATUS_ADVANCED,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"rsync-hash-regex"},
+ .type = GF_OPTION_TYPE_STR,
+ /* Setting a default here doesn't work. See dht_init_regex. */
+ .description =
+ "Regular expression for stripping temporary-file "
+ "suffix and prefix used by rsync, to prevent relocation when the "
+ "file is renamed.",
+ .op_version = {3},
+ .level = OPT_STATUS_BASIC,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"extra-hash-regex"},
+ .type = GF_OPTION_TYPE_STR,
+ /* Setting a default here doesn't work. See dht_init_regex. */
+ .description =
+ "Regular expression for stripping temporary-file "
+ "suffix and prefix used by an application, to prevent relocation when "
+ "the file is renamed.",
+ .op_version = {3},
+ .level = OPT_STATUS_BASIC,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {
+ .key = {"rebalance-filter"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+
+ {
+ .key = {"xattr-name"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "trusted.glusterfs.dht",
+ .description =
+ "Base for extended attributes used by this "
+ "translator instance, to avoid conflicts with others above or "
+ "below it.",
+ .op_version = {3},
+ },
+
+ {.key = {"weighted-rebalance"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description =
+ "When enabled, files will be allocated to bricks "
+ "with a probability proportional to their size. Otherwise, all "
+ "bricks will have the same probability (legacy behavior).",
+ .op_version = {GD_OP_VERSION_3_6_0},
+ .level = OPT_STATUS_BASIC,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+
+ /* NUFA option */
+ {.key = {"local-volume-name"}, .type = GF_OPTION_TYPE_XLATOR},
+
+ /* switch option */
+ {.key = {"pattern.switch.case"}, .type = GF_OPTION_TYPE_ANY},
+
+ {
+ .key = {"randomize-hash-range-by-gfid"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description =
+ "Use gfid of directory to determine the subvolume "
+ "from which hash ranges are allocated starting with 0. "
+ "Note that we still use a directory/file's name to determine the "
+ "subvolume to which it hashes",
+ .op_version = {GD_OP_VERSION_3_6_0},
+ },
+
+ {.key = {"rebal-throttle"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "normal",
+ .description = " Sets the maximum number of parallel file migrations "
+ "allowed on a node during the rebalance operation. The"
+ " default value is normal and allows a max of "
+ "[($(processing units) - 4) / 2), 2] files to be "
+ "migrated at a time. Lazy will allow only one file to "
+ "be migrated at a time and aggressive will allow "
+ "max of [($(processing units) - 4) / 2), 4]",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .level = OPT_STATUS_BASIC,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC
+
+ },
+
+ {.key = {"lock-migration"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = " If enabled this feature will migrate the posix locks"
+ " associated with a file during rebalance",
+ .op_version = {GD_OP_VERSION_3_8_0},
+ .level = OPT_STATUS_ADVANCED,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+
+ {.key = {"force-migration"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "If disabled, rebalance will not migrate files that "
+ "are being written to by an application",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .level = OPT_STATUS_ADVANCED,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+
+ {.key = {NULL}},
};
+
+#define NUM_DHT_OPTIONS (sizeof(dht_options) / sizeof(dht_options[0]))
+
+extern struct volume_options options[NUM_DHT_OPTIONS]
+ __attribute__((alias("dht_options")));
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index 3934df5ec64..53de8292704 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -8,87 +8,116 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "statedump.h"
#include "dht-common.h"
-dht_methods_t dht_methods = {
- .migration_get_dst_subvol = dht_migration_get_dst_subvol,
- .migration_needed = dht_migration_needed,
- .layout_search = dht_layout_search,
-};
+struct xlator_fops dht_pt_fops = {
+ /* we need to keep mkdir to make sure we
+ have layout on new directory */
+ .mkdir = dht_pt_mkdir,
+ .getxattr = dht_pt_getxattr,
+ .fgetxattr = dht_pt_fgetxattr,
-class_methods_t class_methods = {
- .init = dht_init,
- .fini = dht_fini,
- .reconfigure = dht_reconfigure,
- .notify = dht_notify
+ /* required to trace fop properly in changelog */
+ .rename = dht_pt_rename,
+
+ /* FIXME: commenting the '.lookup()' below made some of
+ the failing tests to pass. I would remove the below
+ line, but keeping it here as a reminder for people
+ to check for issues if they find concerns with DHT
+ pass-through logic */
+ /*
+ .lookup = dht_lookup,
+ .readdir = dht_readdir,
+ .readdirp = dht_readdirp,
+ */
+ /* Keeping above as commented, mainly to support the
+ usecase of a gluster volume getting to 1x(anytype),
+ due to remove-brick (shrinking) exercise. In that case,
+ we would need above fops to be available, so we can
+ handle the case of dangling linkto files (if any) */
};
struct xlator_fops fops = {
- .lookup = dht_lookup,
- .mknod = dht_mknod,
- .create = dht_create,
+ .ipc = dht_ipc,
+ .lookup = dht_lookup,
+ .mknod = dht_mknod,
+ .create = dht_create,
- .open = dht_open,
- .statfs = dht_statfs,
- .opendir = dht_opendir,
- .readdir = dht_readdir,
- .readdirp = dht_readdirp,
- .fsyncdir = dht_fsyncdir,
- .symlink = dht_symlink,
- .unlink = dht_unlink,
- .link = dht_link,
- .mkdir = dht_mkdir,
- .rmdir = dht_rmdir,
- .rename = dht_rename,
- .entrylk = dht_entrylk,
- .fentrylk = dht_fentrylk,
+ .open = dht_open,
+ .statfs = dht_statfs,
+ .opendir = dht_opendir,
+ .readdir = dht_readdir,
+ .readdirp = dht_readdirp,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = dht_unlink,
+ .link = dht_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
- /* Inode read operations */
- .stat = dht_stat,
- .fstat = dht_fstat,
- .access = dht_access,
- .readlink = dht_readlink,
- .getxattr = dht_getxattr,
- .fgetxattr = dht_fgetxattr,
- .readv = dht_readv,
- .flush = dht_flush,
- .fsync = dht_fsync,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
- .lk = dht_lk,
+ /* Inode read operations */
+ .stat = dht_stat,
+ .fstat = dht_fstat,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .getxattr = dht_getxattr,
+ .fgetxattr = dht_fgetxattr,
+ .readv = dht_readv,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .lk = dht_lk,
+ .lease = dht_lease,
- /* Inode write operations */
- .fremovexattr = dht_fremovexattr,
- .removexattr = dht_removexattr,
- .setxattr = dht_setxattr,
- .fsetxattr = dht_fsetxattr,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .writev = dht_writev,
- .xattrop = dht_xattrop,
- .fxattrop = dht_fxattrop,
- .setattr = dht_setattr,
- .fsetattr = dht_fsetattr,
- .fallocate = dht_fallocate,
- .discard = dht_discard,
- .zerofill = dht_zerofill,
+ /* Inode write operations */
+ .fremovexattr = dht_fremovexattr,
+ .removexattr = dht_removexattr,
+ .setxattr = dht_setxattr,
+ .fsetxattr = dht_fsetxattr,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .writev = dht_writev,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
+ .setattr = dht_setattr,
+ .fsetattr = dht_fsetattr,
+ .fallocate = dht_fallocate,
+ .discard = dht_discard,
+ .zerofill = dht_zerofill,
};
struct xlator_dumpops dumpops = {
- .priv = dht_priv_dump,
- .inodectx = dht_inodectx_dump,
+ .priv = dht_priv_dump,
+ .inodectx = dht_inodectx_dump,
};
-
struct xlator_cbks cbks = {
-// .release = dht_release,
-// .releasedir = dht_releasedir,
- .forget = dht_forget
+ .release = dht_release,
+ // .releasedir = dht_releasedir,
+ .forget = dht_forget,
+};
+
+extern int32_t
+mem_acct_init(xlator_t *this);
+
+extern struct volume_options dht_options[];
+
+xlator_api_t xlator_api = {
+ .init = dht_init,
+ .fini = dht_fini,
+ .notify = dht_notify,
+ .reconfigure = dht_reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = dht_options,
+ .identifier = "distribute",
+ .pass_through_fops = &dht_pt_fops,
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c
index 72d6d9c10e5..3648a564840 100644
--- a/xlators/cluster/dht/src/nufa.c
+++ b/xlators/cluster/dht/src/nufa.c
@@ -8,674 +8,650 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "dht-common.h"
/* TODO: all 'TODO's in dht.c holds good */
-extern struct volume_options options[];
+extern struct volume_options dht_options[];
int
-nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
+nufa_local_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
- xlator_t *subvol = NULL;
- char is_linkfile = 0;
- char is_dir = 0;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- loc_t *loc = NULL;
- int i = 0;
- call_frame_t *prev = NULL;
- int call_cnt = 0;
- int ret = 0;
-
- conf = this->private;
-
- prev = cookie;
- local = frame->local;
- loc = &local->loc;
-
- if (ENTRY_MISSING (op_ret, op_errno)) {
- if (conf->search_unhashed) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
+ xlator_t *subvol = NULL;
+ char is_linkfile = 0;
+ char is_dir = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+ int i = 0;
+ xlator_t *prev = NULL;
+ int call_cnt = 0;
+ int ret = 0;
+
+ conf = this->private;
+
+ prev = cookie;
+ local = frame->local;
+ loc = &local->loc;
+
+ if (ENTRY_MISSING(op_ret, op_errno)) {
+ if (conf->search_unhashed) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere(frame, this, loc);
+ return 0;
}
-
- if (op_ret == -1)
- goto out;
-
- is_linkfile = check_is_linkfile (inode, stbuf, xattr,
- conf->link_xattr_name);
- is_dir = check_is_dir (inode, stbuf, xattr);
-
- if (!is_dir && !is_linkfile) {
- /* non-directory and not a linkfile */
- ret = dht_layout_preset (this, prev->this, inode);
- if (ret < 0) {
- gf_msg_debug (this->name, 0,
- "could not set pre-set layout for subvol"
- " %s", prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto err;
- }
-
- goto out;
+ }
+
+ if (op_ret == -1)
+ goto out;
+
+ is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name);
+ is_dir = check_is_dir(inode, stbuf, xattr);
+
+ if (!is_dir && !is_linkfile) {
+ /* non-directory and not a linkfile */
+ ret = dht_layout_preset(this, prev, inode);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0,
+ "could not set pre-set layout for subvol"
+ " %s",
+ prev->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto err;
}
- if (is_dir) {
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
-
- local->inode = inode_ref (inode);
- local->xattr = dict_ref (xattr);
-
- local->op_ret = 0;
- local->op_errno = 0;
-
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
- if (!local->layout) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto err;
- }
-
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- &local->loc, local->xattr_req);
- }
- }
+ goto out;
+ }
+
+ if (is_dir) {
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
- if (is_linkfile) {
- subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
+ local->inode = inode_ref(inode);
+ local->xattr = dict_ref(xattr);
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "linkfile has no link subvolume. path=%s",
- loc->path);
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
+ local->op_ret = 0;
+ local->op_errno = 0;
- STACK_WIND (frame, dht_lookup_linkfile_cbk,
- subvol, subvol->fops->lookup,
- &local->loc, local->xattr_req);
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto err;
}
- return 0;
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ local->xattr_req);
+ }
+ }
-out:
- if (!local->hashed_subvol) {
- gf_msg_debug (this->name, 0,
- "no subvolume in layout for path=%s",
- local->loc.path);
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
+ if (is_linkfile) {
+ subvol = dht_linkfile_subvol(this, inode, stbuf, xattr);
+
+ if (!subvol) {
+ gf_msg_debug(this->name, 0,
+ "linkfile has no link subvolume. path=%s", loc->path);
+ dht_lookup_everywhere(frame, this, loc);
+ return 0;
}
- STACK_WIND (frame, dht_lookup_cbk,
- local->hashed_subvol, local->hashed_subvol->fops->lookup,
- &local->loc, local->xattr_req);
+ STACK_WIND_COOKIE(frame, dht_lookup_linkfile_cbk, subvol, subvol,
+ subvol->fops->lookup, &local->loc, local->xattr_req);
+ }
+
+ return 0;
+out:
+ if (!local->hashed_subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ local->loc.path);
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere(frame, this, loc);
return 0;
+ }
+
+ STACK_WIND_COOKIE(frame, dht_lookup_cbk, local->hashed_subvol,
+ local->hashed_subvol, local->hashed_subvol->fops->lookup,
+ &local->loc, local->xattr_req);
+
+ return 0;
err:
- DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno,
- inode, stbuf, xattr, postparent);
- return 0;
+ DHT_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
+ postparent);
+ return 0;
}
int
-nufa_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+nufa_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
{
- xlator_t *hashed_subvol = NULL;
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int op_errno = -1;
- dht_layout_t *layout = NULL;
- int i = 0;
- int call_cnt = 0;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- conf = this->private;
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = -1;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+
+ conf = this->private;
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_LOOKUP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (xattr_req) {
+ local->xattr_req = dict_ref(xattr_req);
+ } else {
+ local->xattr_req = dict_new();
+ }
+
+ hashed_subvol = dht_subvol_get_hashed(this, &local->loc);
+
+ local->hashed_subvol = hashed_subvol;
+
+ if (is_revalidate(loc)) {
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0,
+ "revalidate lookup without cache. "
+ "path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
}
- if (xattr_req) {
- local->xattr_req = dict_ref (xattr_req);
- } else {
- local->xattr_req = dict_new ();
+ if (layout->gen && (layout->gen < conf->gen)) {
+ gf_msg_debug(this->name, 0, "incomplete layout failure for path=%s",
+ loc->path);
+ dht_layout_unref(this, local->layout);
+ goto do_fresh_lookup;
}
- hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
-
- local->hashed_subvol = hashed_subvol;
-
- if (is_revalidate (loc)) {
- layout = local->layout;
- if (!layout) {
- gf_msg_debug (this->name, 0,
- "revalidate lookup without cache. "
- "path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- if (layout->gen && (layout->gen < conf->gen)) {
- gf_msg_debug (this->name, 0,
- "incomplete layout failure for path=%s",
- loc->path);
- dht_layout_unref (this, local->layout);
- goto do_fresh_lookup;
- }
-
- local->inode = inode_ref (loc->inode);
-
- local->call_cnt = layout->cnt;
- call_cnt = local->call_cnt;
-
- /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
- * revalidates directly go to the cached-subvolume.
- */
- ret = dict_set_uint32 (local->xattr_req,
- conf->xattr_name, 4 * 4);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to set dict value.");
- op_errno = -1;
- goto err;
- }
-
- for (i = 0; i < layout->cnt; i++) {
- subvol = layout->list[i].xlator;
-
- STACK_WIND (frame, dht_revalidate_cbk,
- subvol, subvol->fops->lookup,
- loc, local->xattr_req);
-
- if (!--call_cnt)
- break;
- }
- } else {
- do_fresh_lookup:
- ret = dict_set_uint32 (local->xattr_req,
- conf->xattr_name, 4 * 4);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to set dict value.");
- op_errno = -1;
- goto err;
- }
-
- ret = dict_set_uint32 (local->xattr_req,
- conf->link_xattr_name, 256);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to set dict value.");
- op_errno = -1;
- goto err;
- }
-
- /* Send it to only local volume */
- STACK_WIND (frame, nufa_local_lookup_cbk,
- (xlator_t *)conf->private,
- ((xlator_t *)conf->private)->fops->lookup,
- loc, local->xattr_req);
- }
+ local->inode = inode_ref(loc->inode);
- return 0;
+ local->call_cnt = layout->cnt;
+ call_cnt = local->call_cnt;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
- return 0;
-}
+ /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
+ * revalidates directly go to the cached-subvolume.
+ */
+ ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dict value.");
+ op_errno = -1;
+ goto err;
+ }
-int
-nufa_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
+ for (i = 0; i < layout->cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND_COOKIE(frame, dht_revalidate_cbk, subvol, subvol,
+ subvol->fops->lookup, loc, local->xattr_req);
- local = frame->local;
+ if (!--call_cnt)
+ break;
+ }
+ } else {
+ do_fresh_lookup:
+ ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dict value.");
+ op_errno = -1;
+ goto err;
+ }
- if (op_ret == -1)
- goto err;
+ ret = dict_set_uint32(local->xattr_req, conf->link_xattr_name, 256);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dict value.");
+ op_errno = -1;
+ goto err;
+ }
- STACK_WIND (frame, dht_create_cbk,
- local->cached_subvol, local->cached_subvol->fops->create,
- &local->loc, local->flags, local->mode, local->umask,
- local->fd, local->params);
+ /* Send it to only local volume */
+ STACK_WIND_COOKIE(
+ frame, nufa_local_lookup_cbk, ((xlator_t *)conf->private),
+ ((xlator_t *)conf->private),
+ ((xlator_t *)conf->private)->fops->lookup, loc, local->xattr_req);
+ }
- return 0;
+ return 0;
err:
- DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
}
int
-nufa_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *params)
+nufa_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
- xlator_t *avail_subvol = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ dht_local_t *local = NULL;
- conf = this->private;
+ local = frame->local;
- dht_get_du_info (frame, this, loc);
+ if (op_ret == -1)
+ goto err;
- local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ STACK_WIND_COOKIE(frame, dht_create_cbk, local->cached_subvol,
+ local->cached_subvol, local->cached_subvol->fops->create,
+ &local->loc, local->flags, local->mode, local->umask,
+ local->fd, local->params);
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ return 0;
- avail_subvol = conf->private;
- if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {
- avail_subvol =
- dht_free_disk_available_subvol (this,
- (xlator_t *)conf->private,
- local);
- }
+err:
+ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+}
- if (subvol != avail_subvol) {
- /* create a link file instead of actual file */
- local->params = dict_ref (params);
- local->mode = mode;
- local->flags = flags;
- local->umask = umask;
- local->cached_subvol = avail_subvol;
- dht_linkfile_create (frame, nufa_create_linkfile_create_cbk,
- this, avail_subvol, subvol, loc);
- return 0;
- }
+int
+nufa_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *avail_subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ conf = this->private;
+
+ dht_get_du_info(frame, this, loc);
+
+ local = dht_local_init(frame, loc, fd, GF_FOP_CREATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed(this, loc);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ avail_subvol = conf->private;
+ if (dht_is_subvol_filled(this, (xlator_t *)conf->private)) {
+ avail_subvol = dht_free_disk_available_subvol(
+ this, (xlator_t *)conf->private, local);
+ }
+
+ if (subvol != avail_subvol) {
+ /* create a link file instead of actual file */
+ local->params = dict_ref(params);
+ local->mode = mode;
+ local->flags = flags;
+ local->umask = umask;
+ local->cached_subvol = avail_subvol;
+ dht_linkfile_create(frame, nufa_create_linkfile_create_cbk, this,
+ avail_subvol, subvol, loc);
+ return 0;
+ }
- gf_msg_trace (this->name, 0,
- "creating %s on %s", loc->path, subvol->name);
+ gf_msg_trace(this->name, 0, "creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- loc, flags, mode, umask, fd, params);
+ STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
+ subvol->fops->create, loc, flags, mode, umask, fd,
+ params);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
- return 0;
+ return 0;
}
int
-nufa_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+nufa_mknod_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
- local = frame->local;
- if (!local || !local->cached_subvol) {
- op_errno = EINVAL;
- op_ret = -1;
- goto err;
- }
+ local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto err;
+ }
- if (op_ret >= 0) {
- STACK_WIND_COOKIE (frame, dht_newfile_cbk,
- (void *)local->cached_subvol, local->cached_subvol,
- local->cached_subvol->fops->mknod,
- &local->loc, local->mode, local->rdev,
- local->umask, local->params);
+ if (op_ret >= 0) {
+ STACK_WIND_COOKIE(
+ frame, dht_newfile_cbk, (void *)local->cached_subvol,
+ local->cached_subvol, local->cached_subvol->fops->mknod,
+ &local->loc, local->mode, local->rdev, local->umask, local->params);
- return 0;
- }
+ return 0;
+ }
err:
- WIPE (postparent);
- WIPE (preparent);
+ WIPE(postparent);
+ WIPE(preparent);
- DHT_STACK_UNWIND (link, frame, op_ret, op_errno,
- inode, stbuf, preparent, postparent, xdata);
- return 0;
+ DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, xdata);
+ return 0;
}
-
int
-nufa_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params)
+nufa_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *params)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
- xlator_t *avail_subvol = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
-
- conf = this->private;
-
- dht_get_du_info (frame, this, loc);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
-
- /* Consider the disksize in consideration */
- avail_subvol = conf->private;
- if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {
- avail_subvol =
- dht_free_disk_available_subvol (this,
- (xlator_t *)conf->private,
- local);
- }
-
- if (avail_subvol != subvol) {
- /* Create linkfile first */
-
- local->params = dict_ref (params);
- local->mode = mode;
- local->umask = umask;
- local->rdev = rdev;
- local->cached_subvol = avail_subvol;
-
- dht_linkfile_create (frame, nufa_mknod_linkfile_cbk, this,
- avail_subvol, subvol, loc);
- return 0;
- }
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *avail_subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ conf = this->private;
+
+ dht_get_du_info(frame, this, loc);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_MKNOD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed(this, loc);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ /* Consider the disksize in consideration */
+ avail_subvol = conf->private;
+ if (dht_is_subvol_filled(this, (xlator_t *)conf->private)) {
+ avail_subvol = dht_free_disk_available_subvol(
+ this, (xlator_t *)conf->private, local);
+ }
+
+ if (avail_subvol != subvol) {
+ /* Create linkfile first */
+
+ local->params = dict_ref(params);
+ local->mode = mode;
+ local->umask = umask;
+ local->rdev = rdev;
+ local->cached_subvol = avail_subvol;
+
+ dht_linkfile_create(frame, nufa_mknod_linkfile_cbk, this, avail_subvol,
+ subvol, loc);
+ return 0;
+ }
- gf_msg_trace (this->name, 0,
- "creating %s on %s", loc->path, subvol->name);
+ gf_msg_trace(this->name, 0, "creating %s on %s", loc->path, subvol->name);
- STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol,
- subvol->fops->mknod, loc, mode, rdev, umask,
- params);
+ STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask, params);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
gf_boolean_t
-same_first_part (char *str1, char term1, char *str2, char term2)
+same_first_part(char *str1, char term1, char *str2, char term2)
{
- gf_boolean_t ended1;
- gf_boolean_t ended2;
-
- for (;;) {
- ended1 = ((*str1 == '\0') || (*str1 == term1));
- ended2 = ((*str2 == '\0') || (*str2 == term2));
- if (ended1 && ended2) {
- return _gf_true;
- }
- if (ended1 || ended2 || (*str1 != *str2)) {
- return _gf_false;
- }
- ++str1;
- ++str2;
+ gf_boolean_t ended1;
+ gf_boolean_t ended2;
+
+ for (;;) {
+ ended1 = ((*str1 == '\0') || (*str1 == term1));
+ ended2 = ((*str2 == '\0') || (*str2 == term2));
+ if (ended1 && ended2) {
+ return _gf_true;
}
+ if (ended1 || ended2 || (*str1 != *str2)) {
+ return _gf_false;
+ }
+ ++str1;
+ ++str2;
+ }
}
typedef struct nufa_args {
- xlator_t *this;
- char *volname;
- gf_boolean_t addr_match;
+ xlator_t *this;
+ char *volname;
+ gf_boolean_t addr_match;
} nufa_args_t;
static void
-nufa_find_local_brick (xlator_t *xl, void *data)
+nufa_find_local_brick(xlator_t *xl, void *data)
{
- nufa_args_t *args = data;
- xlator_t *this = args->this;
- char *local_volname = args->volname;
- gf_boolean_t addr_match = args->addr_match;
- char *brick_host = NULL;
- dht_conf_t *conf = this->private;
- int ret = -1;
-
- /*This means a local subvol was already found. We pick the first brick
- * that is local*/
- if (conf->private)
- return;
-
- if (strcmp (xl->name, local_volname) == 0) {
- conf->private = xl;
- gf_log (this->name, GF_LOG_INFO, "Using specified subvol %s",
- local_volname);
- return;
- }
-
- if (!addr_match)
- return;
-
- ret = dict_get_str (xl->options, "remote-host", &brick_host);
- if ((ret == 0) &&
- (gf_is_same_address (local_volname, brick_host) ||
- gf_is_local_addr (brick_host))) {
- conf->private = xl;
- gf_log (this->name, GF_LOG_INFO, "Using the first local "
- "subvol %s", xl->name);
- return;
- }
-
+ nufa_args_t *args = data;
+ xlator_t *this = args->this;
+ char *local_volname = args->volname;
+ gf_boolean_t addr_match = args->addr_match;
+ char *brick_host = NULL;
+ dht_conf_t *conf = this->private;
+ int ret = -1;
+
+ /*This means a local subvol was already found. We pick the first brick
+ * that is local*/
+ if (conf->private)
+ return;
+
+ if (strcmp(xl->name, local_volname) == 0) {
+ conf->private = xl;
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "Using specified subvol %s", local_volname);
+ return;
+ }
+
+ if (!addr_match)
+ return;
+
+ ret = dict_get_str(xl->options, "remote-host", &brick_host);
+ if ((ret == 0) && (gf_is_same_address(local_volname, brick_host) ||
+ gf_is_local_addr(brick_host))) {
+ conf->private = xl;
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "Using the first local "
+ "subvol %s",
+ xl->name);
+ return;
+ }
}
static void
-nufa_to_dht (xlator_t *this)
+nufa_to_dht(xlator_t *this)
{
- GF_ASSERT (this);
- GF_ASSERT (this->fops);
+ GF_ASSERT(this);
+ GF_ASSERT(this->fops);
- this->fops->lookup = dht_lookup;
- this->fops->create = dht_create;
- this->fops->mknod = dht_mknod;
+ this->fops->lookup = dht_lookup;
+ this->fops->create = dht_create;
+ this->fops->mknod = dht_mknod;
}
int
-nufa_find_local_subvol (xlator_t *this,
- void (*fn) (xlator_t *each, void* data), void *data)
+nufa_find_local_subvol(xlator_t *this, void (*fn)(xlator_t *each, void *data),
+ void *data)
{
- int ret = -1;
- dht_conf_t *conf = this->private;
- xlator_list_t *trav = NULL;
- xlator_t *parent = NULL;
- xlator_t *candidate = NULL;
-
- xlator_foreach_depth_first (this, fn, data);
- if (!conf->private) {
- gf_log (this->name, GF_LOG_ERROR, "Couldn't find a local "
- "brick");
- return -1;
+ int ret = -1;
+ dht_conf_t *conf = this->private;
+ xlator_list_t *trav = NULL;
+ xlator_t *parent = NULL;
+ xlator_t *candidate = NULL;
+
+ xlator_foreach_depth_first(this, fn, data);
+ if (!conf->private) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_BRICK_ERROR,
+ "Couldn't find a local "
+ "brick");
+ return -1;
+ }
+
+ candidate = conf->private;
+ trav = candidate->parents;
+ while (trav) {
+ parent = trav->xlator;
+ if (strcmp(parent->type, "cluster/nufa") == 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "Found local subvol, "
+ "%s",
+ candidate->name);
+ ret = 0;
+ conf->private = candidate;
+ break;
}
- candidate = conf->private;
- trav = candidate->parents;
- while (trav) {
-
- parent = trav->xlator;
- if (strcmp (parent->type, "cluster/nufa") == 0) {
- gf_log (this->name, GF_LOG_INFO, "Found local subvol, "
- "%s", candidate->name);
- ret = 0;
- conf->private = candidate;
- break;
- }
-
- candidate = parent;
- trav = parent->parents;
- }
+ candidate = parent;
+ trav = parent->parents;
+ }
- return ret;
+ return ret;
}
int
-nufa_init (xlator_t *this)
+nufa_init(xlator_t *this)
{
- data_t *data = NULL;
- char *local_volname = NULL;
- int ret = -1;
- char my_hostname[256];
- gf_boolean_t addr_match = _gf_false;
- nufa_args_t args = {0, };
-
- ret = dht_init(this);
- if (ret) {
- return ret;
- }
-
- if ((data = dict_get (this->options, "local-volume-name"))) {
- local_volname = data->data;
-
- } else {
- addr_match = _gf_true;
- local_volname = "localhost";
- ret = gethostname (my_hostname, 256);
- if (ret == 0)
- local_volname = my_hostname;
-
- else
- gf_log (this->name, GF_LOG_WARNING,
- "could not find hostname (%s)",
- strerror (errno));
-
- }
-
- args.this = this;
- args.volname = local_volname;
- args.addr_match = addr_match;
- ret = nufa_find_local_subvol (this, nufa_find_local_brick, &args);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Unable to find local subvolume, switching "
- "to dht mode");
- nufa_to_dht (this);
- }
- return 0;
+ data_t *data = NULL;
+ char *local_volname = NULL;
+ int ret = -1;
+ char my_hostname[256];
+ gf_boolean_t addr_match = _gf_false;
+ nufa_args_t args = {
+ 0,
+ };
+
+ ret = dht_init(this);
+ if (ret) {
+ return ret;
+ }
+
+ if ((data = dict_get(this->options, "local-volume-name"))) {
+ local_volname = data->data;
+
+ } else {
+ addr_match = _gf_true;
+ local_volname = "localhost";
+ ret = gethostname(my_hostname, 256);
+ if (ret == 0)
+ local_volname = my_hostname;
+
+ else
+ gf_msg(this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_GET_HOSTNAME_FAILED, "could not find hostname");
+ }
+
+ args.this = this;
+ args.volname = local_volname;
+ args.addr_match = addr_match;
+ ret = nufa_find_local_subvol(this, nufa_find_local_brick, &args);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "Unable to find local subvolume, switching "
+ "to dht mode");
+ nufa_to_dht(this);
+ }
+ return 0;
}
dht_methods_t dht_methods = {
- .migration_get_dst_subvol = dht_migration_get_dst_subvol,
- .migration_needed = dht_migration_needed,
- .layout_search = dht_layout_search,
-};
-
-class_methods_t class_methods = {
- .init = nufa_init,
- .fini = dht_fini,
- .reconfigure = dht_reconfigure,
- .notify = dht_notify
+ .migration_get_dst_subvol = dht_migration_get_dst_subvol,
+ .layout_search = dht_layout_search,
};
-
struct xlator_fops fops = {
- .lookup = nufa_lookup,
- .create = nufa_create,
- .mknod = nufa_mknod,
-
- .stat = dht_stat,
- .fstat = dht_fstat,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .access = dht_access,
- .readlink = dht_readlink,
- .setxattr = dht_setxattr,
- .getxattr = dht_getxattr,
- .removexattr = dht_removexattr,
- .open = dht_open,
- .readv = dht_readv,
- .writev = dht_writev,
- .flush = dht_flush,
- .fsync = dht_fsync,
- .statfs = dht_statfs,
- .lk = dht_lk,
- .opendir = dht_opendir,
- .readdir = dht_readdir,
- .readdirp = dht_readdirp,
- .fsyncdir = dht_fsyncdir,
- .symlink = dht_symlink,
- .unlink = dht_unlink,
- .link = dht_link,
- .mkdir = dht_mkdir,
- .rmdir = dht_rmdir,
- .rename = dht_rename,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
- .entrylk = dht_entrylk,
- .fentrylk = dht_fentrylk,
- .xattrop = dht_xattrop,
- .fxattrop = dht_fxattrop,
- .setattr = dht_setattr,
+ .lookup = nufa_lookup,
+ .create = nufa_create,
+ .mknod = nufa_mknod,
+
+ .stat = dht_stat,
+ .fstat = dht_fstat,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .setxattr = dht_setxattr,
+ .getxattr = dht_getxattr,
+ .removexattr = dht_removexattr,
+ .open = dht_open,
+ .readv = dht_readv,
+ .writev = dht_writev,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .statfs = dht_statfs,
+ .lk = dht_lk,
+ .opendir = dht_opendir,
+ .readdir = dht_readdir,
+ .readdirp = dht_readdirp,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = dht_unlink,
+ .link = dht_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
+ .setattr = dht_setattr,
};
-
-struct xlator_cbks cbks = {
- .forget = dht_forget
+struct xlator_cbks cbks = {.forget = dht_forget};
+extern int32_t
+mem_acct_init(xlator_t *this);
+
+xlator_api_t xlator_api = {
+ .init = nufa_init,
+ .fini = dht_fini,
+ .notify = dht_notify,
+ .reconfigure = dht_reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = dht_options,
+ .identifier = "nufa",
+ .category = GF_TECH_PREVIEW,
};
diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c
index 0071dfa265d..207d109a025 100644
--- a/xlators/cluster/dht/src/switch.c
+++ b/xlators/cluster/dht/src/switch.c
@@ -8,12 +8,6 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "dht-common.h"
#include "dht-mem-types.h"
@@ -22,885 +16,876 @@
#include <fnmatch.h>
#include <string.h>
-extern struct volume_options options[];
+extern struct volume_options dht_options[];
struct switch_sched_array {
- xlator_t *xl;
- int32_t eligible;
- int32_t considered;
+ xlator_t *xl;
+ int32_t eligible;
+ int32_t considered;
};
/* Select one of this struct based on the path's pattern match */
struct switch_struct {
- struct switch_struct *next;
- struct switch_sched_array *array;
- int32_t node_index; /* Index of the node in
- this pattern. */
- int32_t num_child; /* Total num of child nodes
- with this pattern. */
- char path_pattern[256];
+ struct switch_struct *next;
+ struct switch_sched_array *array;
+ int32_t node_index; /* Index of the node in
+ this pattern. */
+ int32_t num_child; /* Total num of child nodes
+ with this pattern. */
+ char path_pattern[256];
};
/* TODO: all 'TODO's in dht.c holds good */
/* This function should return child node as '*:subvolumes' is inserterd */
static int32_t
-gf_switch_valid_child (xlator_t *this, const char *child)
+gf_switch_valid_child(xlator_t *this, const char *child)
{
- xlator_list_t *children = NULL;
- int32_t ret = 0;
-
- children = this->children;
- while (children) {
- if (!strcmp (child, children->xlator->name)) {
- ret = 1;
- break;
- }
- children = children->next;
+ xlator_list_t *children = NULL;
+ int32_t ret = 0;
+
+ children = this->children;
+ while (children) {
+ if (!strcmp(child, children->xlator->name)) {
+ ret = 1;
+ break;
}
+ children = children->next;
+ }
- return ret;
+ return ret;
}
static xlator_t *
-get_switch_matching_subvol (const char *path, dht_conf_t *conf,
- xlator_t *hashed_subvol)
+get_switch_matching_subvol(const char *path, dht_conf_t *conf,
+ xlator_t *hashed_subvol)
{
- struct switch_struct *cond = NULL;
- struct switch_struct *trav = NULL;
- char *pathname = NULL;
- int idx = 0;
- xlator_t *subvol = NULL;
-
- cond = conf->private;
- subvol = hashed_subvol;
- if (!cond)
- goto out;
-
- pathname = gf_strdup (path);
- if (!pathname)
- goto out;
-
- trav = cond;
- while (trav) {
- if (fnmatch (trav->path_pattern,
- pathname, FNM_NOESCAPE) == 0) {
- for (idx = 0; idx < trav->num_child; idx++) {
- if (trav->array[idx].xl == hashed_subvol)
- goto out;
- }
- idx = trav->node_index++;
- trav->node_index %= trav->num_child;
- subvol = trav->array[idx].xl;
- goto out;
- }
- trav = trav->next;
+ struct switch_struct *cond = NULL;
+ struct switch_struct *trav = NULL;
+ char *pathname = NULL;
+ int idx = 0;
+ xlator_t *subvol = NULL;
+
+ cond = conf->private;
+ subvol = hashed_subvol;
+ if (!cond)
+ goto out;
+
+ pathname = gf_strdup(path);
+ if (!pathname)
+ goto out;
+
+ trav = cond;
+ while (trav) {
+ if (fnmatch(trav->path_pattern, pathname, FNM_NOESCAPE) == 0) {
+ for (idx = 0; idx < trav->num_child; idx++) {
+ if (trav->array[idx].xl == hashed_subvol)
+ goto out;
+ }
+ idx = trav->node_index++;
+ trav->node_index %= trav->num_child;
+ subvol = trav->array[idx].xl;
+ goto out;
}
+ trav = trav->next;
+ }
out:
- GF_FREE (pathname);
+ GF_FREE(pathname);
- return subvol;
+ return subvol;
}
-
int
-switch_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
+switch_local_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
- xlator_t *subvol = NULL;
- char is_linkfile = 0;
- char is_dir = 0;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- loc_t *loc = NULL;
- int i = 0;
- call_frame_t *prev = NULL;
- int call_cnt = 0;
- int ret = 0;
-
- conf = this->private;
-
- prev = cookie;
- local = frame->local;
- loc = &local->loc;
-
- if (ENTRY_MISSING (op_ret, op_errno)) {
- if (conf->search_unhashed) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
+ xlator_t *subvol = NULL;
+ char is_linkfile = 0;
+ char is_dir = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+ int i = 0;
+ xlator_t *prev = NULL;
+ int call_cnt = 0;
+ int ret = 0;
+
+ conf = this->private;
+
+ prev = cookie;
+ local = frame->local;
+ loc = &local->loc;
+
+ if (ENTRY_MISSING(op_ret, op_errno)) {
+ if (conf->search_unhashed) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere(frame, this, loc);
+ return 0;
}
-
- if (op_ret == -1)
- goto out;
-
- is_linkfile = check_is_linkfile (inode, stbuf, xattr,
- conf->link_xattr_name);
- is_dir = check_is_dir (inode, stbuf, xattr);
-
- if (!is_dir && !is_linkfile) {
- /* non-directory and not a linkfile */
-
- ret = dht_layout_preset (this, prev->this, inode);
- if (ret < 0) {
- gf_msg_debug (this->name, 0,
- "could not set pre-set layout "
- "for subvol %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto err;
- }
-
- goto out;
+ }
+
+ if (op_ret == -1)
+ goto out;
+
+ is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name);
+ is_dir = check_is_dir(inode, stbuf, xattr);
+
+ if (!is_dir && !is_linkfile) {
+ /* non-directory and not a linkfile */
+
+ ret = dht_layout_preset(this, prev, inode);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0,
+ "could not set pre-set layout "
+ "for subvol %s",
+ prev->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto err;
}
- if (is_dir) {
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
+ goto out;
+ }
- local->inode = inode_ref (inode);
- local->xattr = dict_ref (xattr);
+ if (is_dir) {
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
- local->op_ret = 0;
- local->op_errno = 0;
+ local->inode = inode_ref(inode);
+ local->xattr = dict_ref(xattr);
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
- if (!local->layout) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_msg_debug (this->name, 0,
- "memory allocation failed :(");
- goto err;
- }
+ local->op_ret = 0;
+ local->op_errno = 0;
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- &local->loc, local->xattr_req);
- }
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_msg_debug(this->name, 0, "memory allocation failed :(");
+ goto err;
}
- if (is_linkfile) {
- subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ local->xattr_req);
+ }
+ }
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "linkfile has no link subvolume.path=%s",
- loc->path);
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
+ if (is_linkfile) {
+ subvol = dht_linkfile_subvol(this, inode, stbuf, xattr);
- STACK_WIND (frame, dht_lookup_linkfile_cbk,
- subvol, subvol->fops->lookup,
- &local->loc, local->xattr_req);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0,
+ "linkfile has no link subvolume.path=%s", loc->path);
+ dht_lookup_everywhere(frame, this, loc);
+ return 0;
}
- return 0;
+ STACK_WIND_COOKIE(frame, dht_lookup_linkfile_cbk, subvol, subvol,
+ subvol->fops->lookup, &local->loc, local->xattr_req);
+ }
+
+ return 0;
out:
- if (!local->hashed_subvol) {
- gf_msg_debug (this->name, 0,
- "no subvolume in layout for path=%s",
- local->loc.path);
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
+ if (!local->hashed_subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ local->loc.path);
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere(frame, this, loc);
+ return 0;
+ }
- STACK_WIND (frame, dht_lookup_cbk,
- local->hashed_subvol, local->hashed_subvol->fops->lookup,
- &local->loc, local->xattr_req);
+ STACK_WIND_COOKIE(frame, dht_lookup_cbk, local->hashed_subvol,
+ local->hashed_subvol, local->hashed_subvol->fops->lookup,
+ &local->loc, local->xattr_req);
- return 0;
+ return 0;
err:
- DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno,
- inode, stbuf, xattr, NULL);
- return 0;
+ DHT_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
+ NULL);
+ return 0;
}
int
-switch_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+switch_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xattr_req)
{
- xlator_t *hashed_subvol = NULL;
- xlator_t *cached_subvol = NULL;
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int op_errno = -1;
- dht_layout_t *layout = NULL;
- int i = 0;
- int call_cnt = 0;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- conf = this->private;
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = -1;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+
+ conf = this->private;
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_LOOKUP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (xattr_req) {
+ local->xattr_req = dict_ref(xattr_req);
+ } else {
+ local->xattr_req = dict_new();
+ }
+
+ hashed_subvol = dht_subvol_get_hashed(this, &local->loc);
+ cached_subvol = local->cached_subvol;
+
+ local->hashed_subvol = hashed_subvol;
+
+ if (is_revalidate(loc)) {
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0,
+ "revalidate lookup without cache. path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
}
- if (xattr_req) {
- local->xattr_req = dict_ref (xattr_req);
- } else {
- local->xattr_req = dict_new ();
+ if (layout->gen && (layout->gen < conf->gen)) {
+ gf_msg_debug(this->name, 0, "incomplete layout failure for path=%s",
+ loc->path);
+ dht_layout_unref(this, local->layout);
+ goto do_fresh_lookup;
}
- hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
- cached_subvol = local->cached_subvol;
+ local->inode = inode_ref(loc->inode);
- local->hashed_subvol = hashed_subvol;
+ local->call_cnt = layout->cnt;
+ call_cnt = local->call_cnt;
- if (is_revalidate (loc)) {
- layout = local->layout;
- if (!layout) {
- gf_msg_debug(this->name, 0,
- "revalidate lookup without cache. path=%s",
- loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- if (layout->gen && (layout->gen < conf->gen)) {
- gf_msg_debug (this->name, 0,
- "incomplete layout failure for path=%s",
- loc->path);
- dht_layout_unref (this, local->layout);
- goto do_fresh_lookup;
- }
-
- local->inode = inode_ref (loc->inode);
-
- local->call_cnt = layout->cnt;
- call_cnt = local->call_cnt;
+ /* NOTE: we don't require 'trusted.glusterfs.dht.linkto'
+ * attribute, revalidates directly go to the cached-subvolume.
+ */
+ ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4);
+ if (ret < 0)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "failed to set dict value for %s", conf->xattr_name);
- /* NOTE: we don't require 'trusted.glusterfs.dht.linkto'
- * attribute, revalidates directly go to the cached-subvolume.
- */
- ret = dict_set_uint32 (local->xattr_req,
- conf->xattr_name, 4 * 4);
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set dict value for %s",
- conf->xattr_name);
+ for (i = 0; i < layout->cnt; i++) {
+ subvol = layout->list[i].xlator;
- for (i = 0; i < layout->cnt; i++) {
- subvol = layout->list[i].xlator;
+ STACK_WIND_COOKIE(frame, dht_revalidate_cbk, subvol, subvol,
+ subvol->fops->lookup, loc, local->xattr_req);
- STACK_WIND (frame, dht_revalidate_cbk,
- subvol, subvol->fops->lookup,
- loc, local->xattr_req);
+ if (!--call_cnt)
+ break;
+ }
+ } else {
+ do_fresh_lookup:
+ ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4);
+ if (ret < 0)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "failed to set dict value for %s", conf->xattr_name);
+
+ ret = dict_set_uint32(local->xattr_req, conf->link_xattr_name, 256);
+ if (ret < 0)
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DICT_SET_FAILED,
+ "failed to set dict value for %s", conf->link_xattr_name);
+
+ if (!hashed_subvol) {
+ gf_msg_debug(this->name, 0,
+ "no subvolume in layout for path=%s, "
+ "checking on all the subvols to see if "
+ "it is a directory",
+ loc->path);
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+ return 0;
+ }
- if (!--call_cnt)
- break;
- }
+ /* */
+ cached_subvol = get_switch_matching_subvol(loc->path, conf,
+ hashed_subvol);
+ if (cached_subvol == hashed_subvol) {
+ STACK_WIND_COOKIE(frame, dht_lookup_cbk, hashed_subvol,
+ hashed_subvol, hashed_subvol->fops->lookup, loc,
+ local->xattr_req);
} else {
- do_fresh_lookup:
- ret = dict_set_uint32 (local->xattr_req,
- conf->xattr_name, 4 * 4);
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set dict value for %s",
- conf->xattr_name);
-
- ret = dict_set_uint32 (local->xattr_req,
- conf->link_xattr_name, 256);
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set dict value for %s",
- conf->link_xattr_name);
-
- if (!hashed_subvol) {
- gf_msg_debug (this->name, 0,
- "no subvolume in layout for path=%s, "
- "checking on all the subvols to see if "
- "it is a directory", loc->path);
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
-
- local->layout = dht_layout_new (this,
- conf->subvolume_cnt);
- if (!local->layout) {
- op_errno = ENOMEM;
- goto err;
- }
-
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- &local->loc, local->xattr_req);
- }
- return 0;
- }
-
- /* */
- cached_subvol = get_switch_matching_subvol (loc->path, conf,
- hashed_subvol);
- if (cached_subvol == hashed_subvol) {
- STACK_WIND (frame, dht_lookup_cbk,
- hashed_subvol,
- hashed_subvol->fops->lookup,
- loc, local->xattr_req);
- } else {
- STACK_WIND (frame, switch_local_lookup_cbk,
- cached_subvol,
- cached_subvol->fops->lookup,
- loc, local->xattr_req);
- }
+ STACK_WIND_COOKIE(frame, switch_local_lookup_cbk, cached_subvol,
+ cached_subvol, cached_subvol->fops->lookup, loc,
+ local->xattr_req);
}
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
- return 0;
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
}
int
-switch_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+switch_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (op_ret == -1)
- goto err;
+ if (op_ret == -1)
+ goto err;
- STACK_WIND (frame, dht_create_cbk,
- local->cached_subvol, local->cached_subvol->fops->create,
- &local->loc, local->flags, local->mode, local->umask,
- local->fd, local->params);
+ STACK_WIND_COOKIE(frame, dht_create_cbk, local->cached_subvol,
+ local->cached_subvol, local->cached_subvol->fops->create,
+ &local->loc, local->flags, local->mode, local->umask,
+ local->fd, local->params);
- return 0;
+ return 0;
err:
- DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
int
-switch_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *params)
+switch_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
- xlator_t *avail_subvol = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
-
- conf = this->private;
-
- dht_get_du_info (frame, this, loc);
-
- local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
-
- avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol);
- if (dht_is_subvol_filled (this, avail_subvol)) {
- avail_subvol =
- dht_free_disk_available_subvol (this, avail_subvol,
- local);
- }
-
- if (subvol != avail_subvol) {
- /* create a link file instead of actual file */
- local->mode = mode;
- local->flags = flags;
- local->umask = umask;
- local->cached_subvol = avail_subvol;
- dht_linkfile_create (frame, switch_create_linkfile_create_cbk,
- this, avail_subvol, subvol, loc);
- return 0;
- }
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *avail_subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ conf = this->private;
+
+ dht_get_du_info(frame, this, loc);
+
+ local = dht_local_init(frame, loc, fd, GF_FOP_CREATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed(this, loc);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ avail_subvol = get_switch_matching_subvol(loc->path, conf, subvol);
+ if (dht_is_subvol_filled(this, avail_subvol)) {
+ avail_subvol = dht_free_disk_available_subvol(this, avail_subvol,
+ local);
+ }
+
+ if (subvol != avail_subvol) {
+ /* create a link file instead of actual file */
+ local->mode = mode;
+ local->flags = flags;
+ local->umask = umask;
+ local->cached_subvol = avail_subvol;
+ dht_linkfile_create(frame, switch_create_linkfile_create_cbk, this,
+ avail_subvol, subvol, loc);
+ return 0;
+ }
- gf_msg_trace (this->name, 0,
- "creating %s on %s", loc->path, subvol->name);
+ gf_msg_trace(this->name, 0, "creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- loc, flags, mode, umask, fd, params);
+ STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
+ subvol->fops->create, loc, flags, mode, umask, fd,
+ params);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
- return 0;
+ return 0;
}
int
-switch_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+switch_mknod_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
- local = frame->local;
- if (!local || !local->cached_subvol) {
- op_errno = EINVAL;
- op_ret = -1;
- goto err;
- }
+ local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto err;
+ }
- if (op_ret >= 0) {
- STACK_WIND_COOKIE (frame, dht_newfile_cbk,
- (void *)local->cached_subvol, local->cached_subvol,
- local->cached_subvol->fops->mknod,
- &local->loc, local->mode, local->rdev,
- local->umask, local->params);
+ if (op_ret >= 0) {
+ STACK_WIND_COOKIE(
+ frame, dht_newfile_cbk, (void *)local->cached_subvol,
+ local->cached_subvol, local->cached_subvol->fops->mknod,
+ &local->loc, local->mode, local->rdev, local->umask, local->params);
- return 0;
- }
-err:
- DHT_STACK_UNWIND (link, frame, op_ret, op_errno,
- inode, stbuf, preparent, postparent, xdata);
return 0;
+ }
+err:
+ DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, xdata);
+ return 0;
}
-
int
-switch_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, mode_t umask, dict_t *params)
+switch_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *params)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
- xlator_t *avail_subvol = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
-
- conf = this->private;
-
- dht_get_du_info (frame, this, loc);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
-
- /* Consider the disksize in consideration */
- avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol);
- if (dht_is_subvol_filled (this, avail_subvol)) {
- avail_subvol =
- dht_free_disk_available_subvol (this, avail_subvol,
- local);
- }
-
- if (avail_subvol != subvol) {
- /* Create linkfile first */
-
- local->params = dict_ref (params);
- local->mode = mode;
- local->umask = umask;
- local->rdev = rdev;
- local->cached_subvol = avail_subvol;
-
- dht_linkfile_create (frame, switch_mknod_linkfile_cbk,
- this, avail_subvol, subvol, loc);
- return 0;
- }
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *avail_subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ conf = this->private;
+
+ dht_get_du_info(frame, this, loc);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_MKNOD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed(this, loc);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ /* Consider the disksize in consideration */
+ avail_subvol = get_switch_matching_subvol(loc->path, conf, subvol);
+ if (dht_is_subvol_filled(this, avail_subvol)) {
+ avail_subvol = dht_free_disk_available_subvol(this, avail_subvol,
+ local);
+ }
+
+ if (avail_subvol != subvol) {
+ /* Create linkfile first */
+
+ local->params = dict_ref(params);
+ local->mode = mode;
+ local->umask = umask;
+ local->rdev = rdev;
+ local->cached_subvol = avail_subvol;
+
+ dht_linkfile_create(frame, switch_mknod_linkfile_cbk, this,
+ avail_subvol, subvol, loc);
+ return 0;
+ }
- gf_msg_trace (this->name, 0,
- "creating %s on %s", loc->path, subvol->name);
+ gf_msg_trace(this->name, 0, "creating %s on %s", loc->path, subvol->name);
- STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol,
- subvol->fops->mknod, loc, mode, rdev, umask,
- params);
+ STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask, params);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
void
-switch_fini (xlator_t *this)
+switch_fini(xlator_t *this)
{
- dht_conf_t *conf = NULL;
- struct switch_struct *trav = NULL;
- struct switch_struct *prev = NULL;
-
- conf = this->private;
-
- if (conf) {
- trav = (struct switch_struct *)conf->private;
- conf->private = NULL;
- while (trav) {
- GF_FREE (trav->array);
- prev = trav;
- trav = trav->next;
- GF_FREE (prev);
- }
+ dht_conf_t *conf = NULL;
+ struct switch_struct *trav = NULL;
+ struct switch_struct *prev = NULL;
+
+ conf = this->private;
+
+ if (conf) {
+ trav = (struct switch_struct *)conf->private;
+ conf->private = NULL;
+ while (trav) {
+ GF_FREE(trav->array);
+ prev = trav;
+ trav = trav->next;
+ GF_FREE(prev);
}
+ }
- dht_fini(this);
+ dht_fini(this);
}
int
-set_switch_pattern (xlator_t *this, dht_conf_t *conf,
- const char *pattern_str)
+set_switch_pattern(xlator_t *this, dht_conf_t *conf, const char *pattern_str)
{
- int flag = 0;
- int idx = 0;
- int index = 0;
- int child_count = 0;
- char *tmp = NULL;
- char *tmp1 = NULL;
- char *child = NULL;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *dup_str = NULL;
- char *dup_childs = NULL;
- char *switch_str = NULL;
- char *pattern = NULL;
- char *childs = NULL;
- char *option_string = NULL;
- struct switch_struct *switch_buf = NULL;
- struct switch_struct *switch_opt = NULL;
- struct switch_struct *trav = NULL;
- struct switch_sched_array *switch_buf_array = NULL;
- xlator_list_t *trav_xl = NULL;
-
- trav_xl = this->children;
- while (trav_xl) {
- index++;
- trav_xl = trav_xl->next;
+ int flag = 0;
+ int idx = 0;
+ int index = 0;
+ int child_count = 0;
+ char *tmp = NULL;
+ char *tmp1 = NULL;
+ char *child = NULL;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *dup_str = NULL;
+ char *dup_childs = NULL;
+ char *switch_str = NULL;
+ char *pattern = NULL;
+ char *childs = NULL;
+ char *option_string = NULL;
+ size_t pattern_length;
+ struct switch_struct *switch_buf = NULL;
+ struct switch_struct *switch_opt = NULL;
+ struct switch_struct *trav = NULL;
+ struct switch_sched_array *switch_buf_array = NULL;
+ xlator_list_t *trav_xl = NULL;
+
+ trav_xl = this->children;
+ while (trav_xl) {
+ index++;
+ trav_xl = trav_xl->next;
+ }
+ child_count = index;
+ switch_buf_array = GF_CALLOC((index + 1), sizeof(struct switch_sched_array),
+ gf_switch_mt_switch_sched_array);
+ if (!switch_buf_array)
+ goto err;
+
+ trav_xl = this->children;
+ index = 0;
+
+ while (trav_xl) {
+ switch_buf_array[index].xl = trav_xl->xlator;
+ switch_buf_array[index].eligible = 1;
+ trav_xl = trav_xl->next;
+ index++;
+ }
+
+ /* *jpg:child1,child2;*mpg:child3;*:child4,child5,child6 */
+
+ /* Get the pattern for considering switch case.
+ "option block-size *avi:10MB" etc */
+ option_string = gf_strdup(pattern_str);
+ if (option_string == NULL) {
+ goto err;
+ }
+ switch_str = strtok_r(option_string, ";", &tmp_str);
+ while (switch_str) {
+ dup_str = gf_strdup(switch_str);
+ if (dup_str == NULL) {
+ goto err;
}
- child_count = index;
- switch_buf_array = GF_CALLOC ((index + 1),
- sizeof (struct switch_sched_array),
- gf_switch_mt_switch_sched_array);
- if (!switch_buf_array)
- goto err;
-
- trav_xl = this->children;
- index = 0;
-
- while (trav_xl) {
- switch_buf_array[index].xl = trav_xl->xlator;
- switch_buf_array[index].eligible = 1;
- trav_xl = trav_xl->next;
- index++;
+ switch_opt = GF_CALLOC(1, sizeof(struct switch_struct),
+ gf_switch_mt_switch_struct);
+ if (!switch_opt) {
+ GF_FREE(dup_str);
+ goto err;
}
- /* *jpg:child1,child2;*mpg:child3;*:child4,child5,child6 */
-
- /* Get the pattern for considering switch case.
- "option block-size *avi:10MB" etc */
- option_string = gf_strdup (pattern_str);
- switch_str = strtok_r (option_string, ";", &tmp_str);
- while (switch_str) {
- dup_str = gf_strdup (switch_str);
- switch_opt = GF_CALLOC (1, sizeof (struct switch_struct),
- gf_switch_mt_switch_struct);
- if (!switch_opt) {
- GF_FREE (dup_str);
- goto err;
- }
+ pattern = strtok_r(dup_str, ":", &tmp_str1);
+ childs = strtok_r(NULL, ":", &tmp_str1);
+ if (strncmp(pattern, "*", 2) == 0) {
+ gf_msg("switch", GF_LOG_INFO, 0, DHT_MSG_SWITCH_PATTERN_INFO,
+ "'*' pattern will be taken by default "
+ "for all the unconfigured child nodes,"
+ " hence neglecting current option");
+ switch_str = strtok_r(NULL, ";", &tmp_str);
+ GF_FREE(switch_opt);
+ switch_opt = NULL;
+ GF_FREE(dup_str);
+ continue;
+ }
+ GF_FREE(dup_str);
+
+ pattern_length = strlen(pattern);
+ if (pattern_length >= (sizeof(switch_opt->path_pattern))) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_SET_SWITCH_PATTERN_ERROR, "Pattern (%s) too long",
+ pattern);
+ goto err;
+ }
+ memcpy(switch_opt->path_pattern, pattern, pattern_length);
+ switch_opt->path_pattern[pattern_length] = '\0';
- pattern = strtok_r (dup_str, ":", &tmp_str1);
- childs = strtok_r (NULL, ":", &tmp_str1);
- if (strncmp (pattern, "*", 2) == 0) {
- gf_log ("switch", GF_LOG_INFO,
- "'*' pattern will be taken by default "
- "for all the unconfigured child nodes,"
- " hence neglecting current option");
- switch_str = strtok_r (NULL, ";", &tmp_str);
- GF_FREE (switch_opt);
- GF_FREE (dup_str);
- continue;
- }
- GF_FREE (dup_str);
- memcpy (switch_opt->path_pattern, pattern, strlen (pattern));
- if (childs) {
- dup_childs = gf_strdup (childs);
- child = strtok_r (dup_childs, ",", &tmp);
- while (child) {
- if (gf_switch_valid_child (this, child)) {
- idx++;
- child = strtok_r (NULL, ",", &tmp);
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "%s is not a subvolume of %s. "
- "pattern can only be scheduled "
- "only to a subvolume of %s",
- child, this->name, this->name);
- goto err;
- }
- }
- GF_FREE (dup_childs);
- child = strtok_r (childs, ",", &tmp1);
- switch_opt->num_child = idx;
- switch_opt->array = GF_CALLOC (1, (idx *
- sizeof (struct switch_sched_array)),
- gf_switch_mt_switch_sched_array);
- if (!switch_opt->array)
- goto err;
- idx = 0;
- while (child) {
- for (index = 0; index < child_count; index++) {
- if (strcmp (switch_buf_array[index].xl->name,
- child) == 0) {
- gf_msg_debug ("switch", 0,
- "'%s' pattern will be "
- "scheduled to \"%s\"",
- switch_opt->path_pattern, child);
- /*
- if (switch_buf_array[index-1].considered) {
- gf_msg_debug ("switch", 0,
- "ambiguity found, exiting");
- return -1;
- }
- */
- switch_opt->array[idx].xl = switch_buf_array[index].xl;
- switch_buf_array[index].considered = 1;
- idx++;
- break;
- }
- }
- child = strtok_r (NULL, ",", &tmp1);
- }
+ if (childs) {
+ dup_childs = gf_strdup(childs);
+ if (dup_childs == NULL) {
+ goto err;
+ }
+ child = strtok_r(dup_childs, ",", &tmp);
+ while (child) {
+ if (gf_switch_valid_child(this, child)) {
+ idx++;
+ child = strtok_r(NULL, ",", &tmp);
} else {
- /* error */
- gf_log ("switch", GF_LOG_ERROR,
- "Check \"scheduler.switch.case\" "
- "option in unify volume. Exiting");
- goto err;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SUBVOL_ERROR,
+ "%s is not a subvolume of %s. "
+ "pattern can only be scheduled "
+ "only to a subvolume of %s",
+ child, this->name, this->name);
+ GF_FREE(dup_childs);
+ goto err;
}
-
- /* Link it to the main structure */
- if (switch_buf) {
- /* there are already few entries */
- trav = switch_buf;
- while (trav->next)
- trav = trav->next;
- trav->next = switch_opt;
- } else {
- /* First entry */
- switch_buf = switch_opt;
+ }
+ GF_FREE(dup_childs);
+ child = strtok_r(childs, ",", &tmp1);
+ switch_opt->num_child = idx;
+ switch_opt->array = GF_CALLOC(
+ 1, (idx * sizeof(struct switch_sched_array)),
+ gf_switch_mt_switch_sched_array);
+ if (!switch_opt->array)
+ goto err;
+ idx = 0;
+ while (child) {
+ for (index = 0; index < child_count; index++) {
+ if (strcmp(switch_buf_array[index].xl->name, child) == 0) {
+ gf_msg_debug("switch", 0,
+ "'%s' pattern will be "
+ "scheduled to \"%s\"",
+ switch_opt->path_pattern, child);
+ /*
+ if (switch_buf_array[index-1].considered) {
+ gf_msg_debug ("switch", 0,
+ "ambiguity found, exiting");
+ return -1;
+ }
+ */
+ switch_opt->array[idx].xl = switch_buf_array[index].xl;
+ switch_buf_array[index].considered = 1;
+ idx++;
+ break;
+ }
}
- switch_opt = NULL;
- switch_str = strtok_r (NULL, ";", &tmp_str);
+ child = strtok_r(NULL, ",", &tmp1);
+ }
+ } else {
+ /* error */
+ gf_msg("switch", GF_LOG_ERROR, 0, DHT_MSG_SET_SWITCH_PATTERN_ERROR,
+ "Check \"scheduler.switch.case\" "
+ "option in unify volume. Exiting");
+ goto err;
}
- /* Now, all the pattern based considerations done, so for all the
- * remaining pattern, '*' to all the remaining child nodes
- */
- {
- for (index=0; index < child_count; index++) {
- /* check for considered flag */
- if (switch_buf_array[index].considered)
- continue;
- flag++;
- }
- if (!flag) {
- gf_log ("switch", GF_LOG_ERROR,
- "No nodes left for pattern '*'. Exiting");
- goto err;
- }
- switch_opt = GF_CALLOC (1, sizeof (struct switch_struct),
- gf_switch_mt_switch_struct);
- if (!switch_opt)
- goto err;
-
- /* Add the '*' pattern to the array */
- memcpy (switch_opt->path_pattern, "*", 2);
- switch_opt->num_child = flag;
- switch_opt->array =
- GF_CALLOC (1,
- flag * sizeof (struct switch_sched_array),
- gf_switch_mt_switch_sched_array);
- if (!switch_opt->array)
- goto err;
- flag = 0;
- for (index=0; index < child_count; index++) {
- /* check for considered flag */
- if (switch_buf_array[index].considered)
- continue;
- gf_msg_debug ("switch", 0, "'%s'"
- " pattern will be scheduled to \"%s\"",
- switch_opt->path_pattern,
- switch_buf_array[index].xl->name);
-
- switch_opt->array[flag].xl =
- switch_buf_array[index].xl;
- switch_buf_array[index].considered = 1;
- flag++;
- }
- if (switch_buf) {
- /* there are already few entries */
- trav = switch_buf;
- while (trav->next)
- trav = trav->next;
- trav->next = switch_opt;
- } else {
- /* First entry */
- switch_buf = switch_opt;
- }
- switch_opt = NULL;
+ /* Link it to the main structure */
+ if (switch_buf) {
+ /* there are already few entries */
+ trav = switch_buf;
+ while (trav->next)
+ trav = trav->next;
+ trav->next = switch_opt;
+ } else {
+ /* First entry */
+ switch_buf = switch_opt;
+ }
+ switch_opt = NULL;
+ switch_str = strtok_r(NULL, ";", &tmp_str);
+ }
+
+ /* Now, all the pattern based considerations done, so for all the
+ * remaining pattern, '*' to all the remaining child nodes
+ */
+ {
+ for (index = 0; index < child_count; index++) {
+ /* check for considered flag */
+ if (switch_buf_array[index].considered)
+ continue;
+ flag++;
+ }
+ if (!flag) {
+ gf_msg("switch", GF_LOG_ERROR, 0, DHT_MSG_SET_SWITCH_PATTERN_ERROR,
+ "No nodes left for pattern '*'. Exiting");
+ goto err;
+ }
+ switch_opt = GF_CALLOC(1, sizeof(struct switch_struct),
+ gf_switch_mt_switch_struct);
+ if (!switch_opt)
+ goto err;
+
+ /* Add the '*' pattern to the array */
+ memcpy(switch_opt->path_pattern, "*", 2);
+ switch_opt->num_child = flag;
+ switch_opt->array = GF_CALLOC(1,
+ flag * sizeof(struct switch_sched_array),
+ gf_switch_mt_switch_sched_array);
+ if (!switch_opt->array)
+ goto err;
+ flag = 0;
+ for (index = 0; index < child_count; index++) {
+ /* check for considered flag */
+ if (switch_buf_array[index].considered)
+ continue;
+ gf_msg_debug("switch", 0,
+ "'%s'"
+ " pattern will be scheduled to \"%s\"",
+ switch_opt->path_pattern,
+ switch_buf_array[index].xl->name);
+
+ switch_opt->array[flag].xl = switch_buf_array[index].xl;
+ switch_buf_array[index].considered = 1;
+ flag++;
+ }
+ if (switch_buf) {
+ /* there are already few entries */
+ trav = switch_buf;
+ while (trav->next)
+ trav = trav->next;
+ trav->next = switch_opt;
+ } else {
+ /* First entry */
+ switch_buf = switch_opt;
}
- /* */
- conf->private = switch_buf;
+ switch_opt = NULL;
+ }
+ /* */
+ conf->private = switch_buf;
- return 0;
+ GF_FREE(option_string);
+ return 0;
err:
- GF_FREE (switch_buf_array);
- GF_FREE (switch_opt);
+ GF_FREE(switch_buf_array);
+ GF_FREE(switch_opt);
+ GF_FREE(option_string);
- if (switch_buf) {
- trav = switch_buf;
- while (trav) {
- GF_FREE (trav->array);
- switch_opt = trav;
- trav = trav->next;
- GF_FREE (switch_opt);
- }
+ if (switch_buf) {
+ trav = switch_buf;
+ while (trav) {
+ GF_FREE(trav->array);
+ switch_opt = trav;
+ trav = trav->next;
+ GF_FREE(switch_opt);
}
- return -1;
+ }
+ return -1;
}
-
int32_t
-switch_init (xlator_t *this)
+switch_init(xlator_t *this)
{
- dht_conf_t *conf = NULL;
- data_t *data = NULL;
- int ret = -1;
+ dht_conf_t *conf = NULL;
+ data_t *data = NULL;
+ int ret = -1;
+
+ ret = dht_init(this);
+ if (ret) {
+ return ret;
+ }
+ conf = this->private;
- ret = dht_init(this);
+ data = dict_get(this->options, "pattern.switch.case");
+ if (data) {
+ /* TODO: */
+ ret = set_switch_pattern(this, conf, data->data);
if (ret) {
- return ret;
- }
- conf = this->private;
-
- data = dict_get (this->options, "pattern.switch.case");
- if (data) {
- /* TODO: */
- ret = set_switch_pattern (this, conf, data->data);
- if (ret) {
- goto err;
- }
+ goto err;
}
+ }
- this->private = conf;
- return 0;
+ this->private = conf;
+ return 0;
err:
- dht_fini(this);
- return -1;
+ dht_fini(this);
+ return -1;
}
-
-class_methods_t class_methods = {
- .init = switch_init,
- .fini = switch_fini,
- .reconfigure = dht_reconfigure,
- .notify = dht_notify
-};
-
-
struct xlator_fops fops = {
- .lookup = switch_lookup,
- .create = switch_create,
- .mknod = switch_mknod,
-
- .stat = dht_stat,
- .fstat = dht_fstat,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .access = dht_access,
- .readlink = dht_readlink,
- .setxattr = dht_setxattr,
- .getxattr = dht_getxattr,
- .removexattr = dht_removexattr,
- .open = dht_open,
- .readv = dht_readv,
- .writev = dht_writev,
- .flush = dht_flush,
- .fsync = dht_fsync,
- .statfs = dht_statfs,
- .lk = dht_lk,
- .opendir = dht_opendir,
- .readdir = dht_readdir,
- .readdirp = dht_readdirp,
- .fsyncdir = dht_fsyncdir,
- .symlink = dht_symlink,
- .unlink = dht_unlink,
- .link = dht_link,
- .mkdir = dht_mkdir,
- .rmdir = dht_rmdir,
- .rename = dht_rename,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
- .entrylk = dht_entrylk,
- .fentrylk = dht_fentrylk,
- .xattrop = dht_xattrop,
- .fxattrop = dht_fxattrop,
- .setattr = dht_setattr,
+ .lookup = switch_lookup,
+ .create = switch_create,
+ .mknod = switch_mknod,
+
+ .stat = dht_stat,
+ .fstat = dht_fstat,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .setxattr = dht_setxattr,
+ .getxattr = dht_getxattr,
+ .removexattr = dht_removexattr,
+ .open = dht_open,
+ .readv = dht_readv,
+ .writev = dht_writev,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .statfs = dht_statfs,
+ .lk = dht_lk,
+ .opendir = dht_opendir,
+ .readdir = dht_readdir,
+ .readdirp = dht_readdirp,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = dht_unlink,
+ .link = dht_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
+ .setattr = dht_setattr,
};
-
-struct xlator_cbks cbks = {
- .forget = dht_forget
+struct xlator_cbks cbks = {.forget = dht_forget};
+extern int32_t
+mem_acct_init(xlator_t *this);
+
+xlator_api_t xlator_api = {
+ .init = switch_init,
+ .fini = switch_fini,
+ .notify = dht_notify,
+ .reconfigure = dht_reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = dht_options,
+ .identifier = "switch",
+ .category = GF_TECH_PREVIEW,
};
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
deleted file mode 100644
index 8180d77787d..00000000000
--- a/xlators/cluster/dht/src/tier.c
+++ /dev/null
@@ -1,1222 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-#include <dlfcn.h>
-
-#include "dht-common.h"
-#include "tier.h"
-
-/*Hard coded DB info*/
-static gfdb_db_type_t dht_tier_db_type = GFDB_SQLITE3;
-/*Hard coded DB info*/
-
-/*Mutex for updating the data movement stats*/
-static pthread_mutex_t dm_stat_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-static char *promotion_qfile;
-static char *demotion_qfile;
-
-static void *libhandle;
-static gfdb_methods_t gfdb_methods;
-
-#define DB_QUERY_RECORD_SIZE 4096
-
-
-
-static int
-tier_parse_query_str (char *query_record_str,
- char *gfid, char *link_buffer, ssize_t *link_size)
-{
- char *token_str = NULL;
- char *delimiter = "|";
- char *saveptr = NULL;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("tier", query_record_str, out);
- GF_VALIDATE_OR_GOTO ("tier", gfid, out);
- GF_VALIDATE_OR_GOTO ("tier", link_buffer, out);
- GF_VALIDATE_OR_GOTO ("tier", link_size, out);
-
- token_str = strtok_r (query_record_str, delimiter, &saveptr);
- if (!token_str)
- goto out;
-
- strcpy (gfid, token_str);
-
-
- token_str = strtok_r (NULL, delimiter, &saveptr);
- if (!token_str)
- goto out;
-
- strcpy (link_buffer, token_str);
-
- token_str = strtok_r (NULL, delimiter, &saveptr);
- if (!token_str)
- goto out;
-
- *link_size = atoi (token_str);
-
- ret = 0;
-out:
- return ret;
-}
-
-static int
-tier_check_same_node (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
-{
- int ret = -1;
- dict_t *dict = NULL;
- char *uuid_str = NULL;
- uuid_t node_uuid = {0,};
-
- GF_VALIDATE_OR_GOTO ("tier", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
- GF_VALIDATE_OR_GOTO (this->name, defrag, out);
-
- if (syncop_getxattr (this, loc, &dict, GF_XATTR_NODE_UUID_KEY,
- NULL, NULL)) {
- gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Unable to get NODE_UUID_KEY %s %s\n",
- loc->name, loc->path);
- goto out;
- }
-
- if (dict_get_str (dict, GF_XATTR_NODE_UUID_KEY, &uuid_str) < 0) {
- gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to get node-uuid for %s", loc->path);
- goto out;
- }
-
- if (gf_uuid_parse (uuid_str, node_uuid)) {
- gf_msg (this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_ERROR,
- "uuid_parse failed for %s", loc->path);
- goto out;
- }
-
- if (gf_uuid_compare (node_uuid, defrag->node_uuid)) {
- gf_msg (this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "%s does not belong to this node", loc->path);
- goto out;
- }
-
- ret = 0;
-out:
- if (dict)
- dict_unref(dict);
-
- return ret;
-}
-
-static int
-tier_migrate_using_query_file (void *_args)
-{
- int ret = -1;
- char gfid_str[UUID_CANONICAL_FORM_LEN+1] = "";
- char query_record_str[4096] = "";
- query_cbk_args_t *query_cbk_args = (query_cbk_args_t *) _args;
- xlator_t *this = NULL;
- gf_defrag_info_t *defrag = NULL;
- char *token_str = NULL;
- char *delimiter = "::";
- char *link_buffer = NULL;
- gfdb_query_record_t *query_record = NULL;
- gfdb_link_info_t *link_info = NULL;
- struct iatt par_stbuf = {0,};
- struct iatt current = {0,};
- loc_t p_loc = {0,};
- loc_t loc = {0,};
- dict_t *migrate_data = NULL;
- inode_t *linked_inode = NULL;
- int per_file_status = 0;
- int per_link_status = 0;
- int total_status = 0;
- FILE *queryFILE = NULL;
- char *link_str = NULL;
- xlator_t *src_subvol = NULL;
- dht_conf_t *conf = NULL;
-
- GF_VALIDATE_OR_GOTO ("tier", query_cbk_args, out);
- GF_VALIDATE_OR_GOTO ("tier", query_cbk_args->this, out);
- this = query_cbk_args->this;
- GF_VALIDATE_OR_GOTO (this->name, query_cbk_args->defrag, out);
- GF_VALIDATE_OR_GOTO (this->name, query_cbk_args->queryFILE, out);
- GF_VALIDATE_OR_GOTO (this->name, this->private, out);
-
- conf = this->private;
-
- defrag = query_cbk_args->defrag;
-
- queryFILE = query_cbk_args->queryFILE;
-
- query_record = gfdb_query_record_init();
- if (!query_record) {
- goto out;
- }
-
- query_record->_link_info_str = GF_CALLOC (1, DB_QUERY_RECORD_SIZE,
- gf_common_mt_char);
- if (!query_record->_link_info_str) {
- goto out;
- }
- link_buffer = query_record->_link_info_str;
-
- link_info = gfdb_link_info_init ();
-
- migrate_data = dict_new ();
- if (!migrate_data)
- goto out;
-
- /* Per file */
- while (fscanf (queryFILE, "%s", query_record_str) != EOF) {
-
- per_file_status = 0;
- per_link_status = 0;
-
- memset (gfid_str, 0, UUID_CANONICAL_FORM_LEN+1);
- memset (query_record->_link_info_str, 0, DB_QUERY_RECORD_SIZE);
-
- if (tier_parse_query_str (query_record_str, gfid_str,
- link_buffer,
- &query_record->link_info_size)) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR,
- "failed parsing %s\n", query_record_str);
- continue;
- }
-
- gf_uuid_parse (gfid_str, query_record->gfid);
-
- if (dict_get(migrate_data, GF_XATTR_FILE_MIGRATE_KEY))
- dict_del(migrate_data, GF_XATTR_FILE_MIGRATE_KEY);
-
- if (dict_get(migrate_data, "from.migrator"))
- dict_del(migrate_data, "from.migrator");
-
- token_str = strtok (link_buffer, delimiter);
- if (token_str != NULL) {
- per_file_status =
- dict_set_str (migrate_data,
- GF_XATTR_FILE_MIGRATE_KEY,
- "force");
- if (per_file_status) {
- goto per_file_out;
- }
-
- /* Flag to suggest the xattr call is from migrator */
- per_file_status = dict_set_str (migrate_data,
- "from.migrator", "yes");
- if (per_file_status) {
- goto per_file_out;
- }
- }
- per_link_status = 0;
- /* Per link of file */
- while (token_str != NULL) {
-
- link_str = gf_strdup (token_str);
-
- if (!link_info) {
- per_link_status = -1;
- goto per_file_out;
- }
-
- memset (link_info, 0, sizeof(gfdb_link_info_t));
-
- ret = str_to_link_info (link_str, link_info);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR,
- "failed parsing %s\n", link_str);
- per_link_status = -1;
- goto error;
- }
-
- gf_uuid_copy (p_loc.gfid, link_info->pargfid);
-
- p_loc.inode = inode_new (defrag->root_inode->table);
- if (!p_loc.inode) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR,
- "failed parsing %s\n", link_str);
- per_link_status = -1;
- goto error;
- }
-
- ret = syncop_lookup (this, &p_loc, &par_stbuf, NULL,
- NULL, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR,
- " ERROR in parent lookup\n");
- per_link_status = -1;
- goto error;
- }
- linked_inode = inode_link (p_loc.inode, NULL, NULL,
- &par_stbuf);
- inode_unref (p_loc.inode);
- p_loc.inode = linked_inode;
-
- gf_uuid_copy (loc.gfid, query_record->gfid);
- loc.inode = inode_new (defrag->root_inode->table);
- gf_uuid_copy (loc.pargfid, link_info->pargfid);
- loc.parent = inode_ref(p_loc.inode);
-
- loc.name = gf_strdup (link_info->file_name);
- if (!loc.name) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR, "ERROR in "
- "memory allocation\n");
- per_link_status = -1;
- goto error;
- }
-
- loc.path = gf_strdup (link_info->file_path);
- if (!loc.path) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR, "ERROR in "
- "memory allocation\n");
- per_link_status = -1;
- goto error;
- }
-
- gf_uuid_copy (loc.parent->gfid, link_info->pargfid);
-
- ret = syncop_lookup (this, &loc, &current, NULL,
- NULL, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR, "ERROR in "
- "current lookup\n");
- per_link_status = -1;
- goto error;
- }
- linked_inode = inode_link (loc.inode, NULL, NULL,
- &current);
- inode_unref (loc.inode);
- loc.inode = linked_inode;
-
- /*
- * Do not promote/demote if file already is where it
- * should be. This shall become a skipped count.
- */
- src_subvol = dht_subvol_get_cached(this, loc.inode);
-
- if (query_cbk_args->is_promotion &&
- src_subvol == conf->subvolumes[1]) {
- per_link_status = -1;
- goto error;
- }
-
- if (!query_cbk_args->is_promotion &&
- src_subvol == conf->subvolumes[0]) {
- per_link_status = -1;
- goto error;
- }
-
- gf_msg (this->name, GF_LOG_INFO, 0,
- DHT_MSG_LOG_TIER_STATUS, "Tier %d"
- " src_subvol %s file %s",
- query_cbk_args->is_promotion,
- src_subvol->name,
- loc.name);
-
- if (tier_check_same_node (this, &loc, defrag)) {
- per_link_status = -1;
- goto error;
- }
-
- ret = syncop_setxattr (this, &loc, migrate_data, 0,
- NULL, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR, "ERROR %d in "
- "current migration %s %s\n", ret,
- loc.name,
- loc.path);
- per_link_status = -1;
- goto error;
- }
-error:
-
- loc_wipe(&loc);
- loc_wipe(&p_loc);
-
- token_str = NULL;
- token_str = strtok (NULL, delimiter);
- GF_FREE (link_str);
- }
- per_file_status = per_link_status;
-per_file_out:
- if (per_file_status) {
- pthread_mutex_lock (&dm_stat_mutex);
- defrag->total_failures++;
- pthread_mutex_unlock (&dm_stat_mutex);
- } else {
- pthread_mutex_lock (&dm_stat_mutex);
- defrag->total_files++;
- pthread_mutex_unlock (&dm_stat_mutex);
- }
- total_status = total_status + per_file_status;
- per_link_status = 0;
- per_file_status = 0;
- query_record_str[0] = '\0';
- }
-
-out:
- if (link_buffer)
- GF_FREE (link_buffer);
- gfdb_link_info_fini (&link_info);
- if (migrate_data)
- dict_unref (migrate_data);
- gfdb_query_record_fini (&query_record);
- return total_status;
-}
-
-
-/*This is the call back function per record/file from data base*/
-static int
-tier_gf_query_callback (gfdb_query_record_t *gfdb_query_record,
- void *_args) {
- int ret = -1;
- char gfid_str[UUID_CANONICAL_FORM_LEN+1] = "";
- query_cbk_args_t *query_cbk_args = _args;
-
- GF_VALIDATE_OR_GOTO ("tier", query_cbk_args, out);
- GF_VALIDATE_OR_GOTO ("tier", query_cbk_args->defrag, out);
- GF_VALIDATE_OR_GOTO ("tier", query_cbk_args->queryFILE, out);
-
- gf_uuid_unparse (gfdb_query_record->gfid, gfid_str);
- fprintf (query_cbk_args->queryFILE, "%s|%s|%ld\n", gfid_str,
- gfdb_query_record->_link_info_str,
- gfdb_query_record->link_info_size);
-
- pthread_mutex_lock (&dm_stat_mutex);
- query_cbk_args->defrag->num_files_lookedup++;
- pthread_mutex_unlock (&dm_stat_mutex);
-
- ret = 0;
-out:
- return ret;
-}
-
-/*This is the call back function for each brick from hot/cold bricklist
- * It picks up each bricks db and queries for eligible files for migration.
- * The list of eligible files are populated in appropriate query files*/
-static int
-tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value,
- void *args) {
- int ret = -1;
- char *db_path = NULL;
- query_cbk_args_t *query_cbk_args = NULL;
- xlator_t *this = NULL;
- gfdb_conn_node_t *conn_node = NULL;
- dict_t *params_dict = NULL;
- _gfdb_brick_dict_info_t *gfdb_brick_dict_info = args;
-
- /*Init of all the essentials*/
- GF_VALIDATE_OR_GOTO ("tier", gfdb_brick_dict_info , out);
- query_cbk_args = gfdb_brick_dict_info->_query_cbk_args;
-
- GF_VALIDATE_OR_GOTO ("tier", query_cbk_args->this, out);
- this = query_cbk_args->this;
-
- GF_VALIDATE_OR_GOTO (this->name,
- gfdb_brick_dict_info->_query_cbk_args, out);
-
- GF_VALIDATE_OR_GOTO (this->name, value, out);
- db_path = data_to_str(value);
-
- /*Preparing DB parameters before init_db i.e getting db connection*/
- params_dict = dict_new ();
- if (!params_dict) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR,
- "DB Params cannot initialized!");
- goto out;
- }
- SET_DB_PARAM_TO_DICT(this->name, params_dict, gfdb_methods.dbpath,
- db_path, ret, out);
-
- /*Get the db connection*/
- conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type);
- if (!conn_node) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR,
- "FATAL: Failed initializing db operations");
- goto out;
- }
-
- /*Query for eligible files from db*/
- query_cbk_args->queryFILE = fopen(GET_QFILE_PATH
- (gfdb_brick_dict_info->_gfdb_promote), "a+");
- if (!query_cbk_args->queryFILE) {
- gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to open query file %s:%s",
- GET_QFILE_PATH
- (gfdb_brick_dict_info->_gfdb_promote),
- strerror(errno));
- goto out;
- }
- if (!gfdb_brick_dict_info->_gfdb_promote) {
- if (query_cbk_args->defrag->write_freq_threshold == 0 &&
- query_cbk_args->defrag->read_freq_threshold == 0) {
- ret = gfdb_methods.find_unchanged_for_time (
- conn_node,
- tier_gf_query_callback,
- (void *)query_cbk_args,
- gfdb_brick_dict_info->time_stamp);
- } else {
- ret = gfdb_methods.find_unchanged_for_time_freq (
- conn_node,
- tier_gf_query_callback,
- (void *)query_cbk_args,
- gfdb_brick_dict_info->time_stamp,
- query_cbk_args->defrag->
- write_freq_threshold,
- query_cbk_args->defrag->
- read_freq_threshold,
- _gf_false);
- }
- } else {
- if (query_cbk_args->defrag->write_freq_threshold == 0 &&
- query_cbk_args->defrag->read_freq_threshold == 0) {
- ret = gfdb_methods.find_recently_changed_files (
- conn_node,
- tier_gf_query_callback,
- (void *)query_cbk_args,
- gfdb_brick_dict_info->time_stamp);
- } else {
- ret = gfdb_methods.find_recently_changed_files_freq (
- conn_node,
- tier_gf_query_callback,
- (void *)query_cbk_args,
- gfdb_brick_dict_info->time_stamp,
- query_cbk_args->defrag->
- write_freq_threshold,
- query_cbk_args->defrag->read_freq_threshold,
- _gf_false);
- }
- }
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR,
- "FATAL: query from db failed");
- goto out;
- }
- ret = 0;
-out:
- if (query_cbk_args->queryFILE) {
- fclose (query_cbk_args->queryFILE);
- query_cbk_args->queryFILE = NULL;
- }
- gfdb_methods.fini_db (conn_node);
- return ret;
-}
-
-inline int
-tier_build_migration_qfile (demotion_args_t *args,
- query_cbk_args_t *query_cbk_args,
- gf_boolean_t is_promotion)
-{
- gfdb_time_t current_time;
- _gfdb_brick_dict_info_t gfdb_brick_dict_info;
- gfdb_time_t time_in_past;
- int ret = -1;
-
- /*
- * The first time this function is called, query file will
- * not exist on a given instance of running the migration daemon.
- * The remove call is optimistic and it is legal if it fails.
- */
-
- ret = remove (GET_QFILE_PATH (is_promotion));
- if (ret == -1) {
- gf_msg (args->this->name, GF_LOG_INFO, 0,
- DHT_MSG_LOG_TIER_STATUS,
- "Failed to remove %s",
- GET_QFILE_PATH (is_promotion));
- }
-
- time_in_past.tv_sec = args->freq_time;
- time_in_past.tv_usec = 0;
-
- ret = gettimeofday (&current_time, NULL);
- if (ret == -1) {
- gf_log (args->this->name, GF_LOG_ERROR,
- "Failed to get current timen");
- goto out;
- }
- time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec;
- time_in_past.tv_usec = current_time.tv_usec - time_in_past.tv_usec;
- gfdb_brick_dict_info.time_stamp = &time_in_past;
- gfdb_brick_dict_info._gfdb_promote = is_promotion;
- gfdb_brick_dict_info._query_cbk_args = query_cbk_args;
- ret = dict_foreach (args->brick_list, tier_process_brick_cbk,
- &gfdb_brick_dict_info);
- if (ret) {
- gf_log (args->this->name, GF_LOG_ERROR,
- "Brick query failedn");
- goto out;
- }
-out:
- return ret;
-}
-
-inline int
-tier_migrate_files_using_qfile (demotion_args_t *comp,
- query_cbk_args_t *query_cbk_args,
- char *qfile)
-{
- char renamed_file[PATH_MAX] = "";
- int ret = -1;
-
- query_cbk_args->queryFILE = fopen (qfile, "r");
- if (!query_cbk_args->queryFILE) {
- gf_log ("tier", GF_LOG_ERROR,
- "Failed opening %s for migration", qfile);
- goto out;
- }
- ret = tier_migrate_using_query_file ((void *)query_cbk_args);
- fclose (query_cbk_args->queryFILE);
- query_cbk_args->queryFILE = NULL;
- if (ret) {
- sprintf (renamed_file, "%s.err", qfile);
- rename (qfile, renamed_file);
- }
-out:
- return ret;
-}
-
-/*Demotion Thread*/
-static void *
-tier_demote (void *args)
-{
- int ret = -1;
- query_cbk_args_t query_cbk_args;
- demotion_args_t *demotion_args = args;
-
- GF_VALIDATE_OR_GOTO ("tier", demotion_args, out);
- GF_VALIDATE_OR_GOTO ("tier", demotion_args->this, out);
- GF_VALIDATE_OR_GOTO (demotion_args->this->name,
- demotion_args->brick_list, out);
- GF_VALIDATE_OR_GOTO (demotion_args->this->name,
- demotion_args->defrag, out);
-
- query_cbk_args.this = demotion_args->this;
- query_cbk_args.defrag = demotion_args->defrag;
- query_cbk_args.is_promotion = 0;
-
- /*Build the query file using bricklist*/
- ret = tier_build_migration_qfile(demotion_args, &query_cbk_args,
- _gf_false);
- if (ret)
- goto out;
-
- /* Migrate files using the query file */
- ret = tier_migrate_files_using_qfile (args,
- &query_cbk_args, demotion_qfile);
- if (ret)
- goto out;
-
-out:
- demotion_args->return_value = ret;
- return NULL;
-}
-
-
-/*Promotion Thread*/
-static void
-*tier_promote (void *args)
-{
- int ret = -1;
- query_cbk_args_t query_cbk_args;
- promotion_args_t *promotion_args = args;
-
- GF_VALIDATE_OR_GOTO ("tier", promotion_args->this, out);
- GF_VALIDATE_OR_GOTO (promotion_args->this->name,
- promotion_args->brick_list, out);
- GF_VALIDATE_OR_GOTO (promotion_args->this->name,
- promotion_args->defrag, out);
-
- query_cbk_args.this = promotion_args->this;
- query_cbk_args.defrag = promotion_args->defrag;
- query_cbk_args.is_promotion = 1;
-
- /*Build the query file using bricklist*/
- ret = tier_build_migration_qfile(promotion_args, &query_cbk_args,
- _gf_true);
- if (ret)
- goto out;
-
- /* Migrate files using the query file */
- ret = tier_migrate_files_using_qfile (args,
- &query_cbk_args,
- promotion_qfile);
- if (ret)
- goto out;
-
-out:
- promotion_args->return_value = ret;
- return NULL;
-}
-
-static int
-tier_get_bricklist (xlator_t *xl, dict_t *bricklist)
-{
- xlator_list_t *child = NULL;
- char *rv = NULL;
- char *rh = NULL;
- char localhost[256] = {0};
- char *db_path = NULL;
- char *brickname = NULL;
- char db_name[PATH_MAX] = "";
- int ret = 0;
-
- GF_VALIDATE_OR_GOTO ("tier", xl, out);
- GF_VALIDATE_OR_GOTO ("tier", bricklist, out);
-
- gethostname (localhost, sizeof (localhost));
-
- /*
- * This function obtains remote subvolumes and filters out only
- * those running on the same node as the tier daemon.
- */
- if (strcmp(xl->type, "protocol/client") == 0) {
- ret = dict_get_str(xl->options, "remote-host", &rh);
- if (ret < 0)
- goto out;
-
- if (gf_is_local_addr (rh)) {
-
- ret = dict_get_str(xl->options, "remote-subvolume",
- &rv);
- if (ret < 0)
- goto out;
- brickname = strrchr(rv, '/') + 1;
- snprintf(db_name, sizeof(db_name), "%s.db",
- brickname);
- db_path = GF_CALLOC (PATH_MAX, 1, gf_common_mt_char);
- if (!db_path) {
- gf_msg ("tier", GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_STATUS,
- "Failed to allocate memory for bricklist");
- goto out;
- }
-
- sprintf(db_path, "%s/%s/%s", rv,
- GF_HIDDEN_PATH,
- db_name);
- if (dict_add_dynstr_with_alloc(bricklist, "brick",
- db_path))
- goto out;
-
- ret = 0;
- goto out;
- }
- }
-
- for (child = xl->children; child; child = child->next) {
- ret = tier_get_bricklist(child->xlator, bricklist);
- }
-out:
- GF_FREE (db_path);
-
- return ret;
-}
-
-int
-tier_start (xlator_t *this, gf_defrag_info_t *defrag)
-{
- dict_t *bricklist_cold = NULL;
- dict_t *bricklist_hot = NULL;
- dht_conf_t *conf = NULL;
- int tick = 0;
- int next_demote = 0;
- int next_promote = 0;
- int freq_promote = 0;
- int freq_demote = 0;
- promotion_args_t promotion_args = { 0 };
- demotion_args_t demotion_args = { 0 };
- int ret_promotion = 0;
- int ret_demotion = 0;
- int ret = 0;
- pthread_t promote_thread;
- pthread_t demote_thread;
-
- conf = this->private;
-
- bricklist_cold = dict_new();
- if (!bricklist_cold)
- return -1;
-
- bricklist_hot = dict_new();
- if (!bricklist_hot)
- return -1;
-
- tier_get_bricklist (conf->subvolumes[0], bricklist_cold);
- tier_get_bricklist (conf->subvolumes[1], bricklist_hot);
-
- freq_promote = defrag->tier_promote_frequency;
- freq_demote = defrag->tier_demote_frequency;
-
- next_promote = defrag->tier_promote_frequency % TIMER_SECS;
- next_demote = defrag->tier_demote_frequency % TIMER_SECS;
-
-
- gf_msg (this->name, GF_LOG_INFO, 0,
- DHT_MSG_LOG_TIER_STATUS, "Begin run tier promote %d demote %d",
- next_promote, next_demote);
-
- defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
-
- while (1) {
-
- sleep(1);
-
- ret_promotion = -1;
- ret_demotion = -1;
-
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
- ret = 1;
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR,
- "defrag->defrag_status != "
- "GF_DEFRAG_STATUS_STARTED");
- goto out;
- }
-
- if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
- ret = 1;
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR,
- "defrag->defrag_cmd == "
- "GF_DEFRAG_CMD_START_DETACH_TIER");
- goto out;
- }
-
- tick = (tick + 1) % TIMER_SECS;
- if ((next_demote != tick) && (next_promote != tick))
- continue;
-
- if (next_demote >= tick) {
- demotion_args.this = this;
- demotion_args.brick_list = bricklist_hot;
- demotion_args.defrag = defrag;
- demotion_args.freq_time = freq_demote;
- ret_demotion = pthread_create (&demote_thread, NULL,
- &tier_demote, &demotion_args);
- if (ret_demotion) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR,
- "Failed starting Demotion thread!");
- }
- freq_demote = defrag->tier_demote_frequency;
- next_demote = (tick + freq_demote) % TIMER_SECS;
- }
-
- if (next_promote >= tick) {
- promotion_args.this = this;
- promotion_args.brick_list = bricklist_cold;
- promotion_args.defrag = defrag;
- promotion_args.freq_time = freq_promote;
- ret_promotion = pthread_create (&promote_thread, NULL,
- &tier_promote, &promotion_args);
- if (ret_promotion) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR,
- "Failed starting Promotion thread!");
- }
- freq_promote = defrag->tier_promote_frequency;
- next_promote = (tick + freq_promote) % TIMER_SECS;
- }
-
- if (ret_demotion == 0) {
- pthread_join (demote_thread, NULL);
- if (demotion_args.return_value) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR,
- "Demotion failed!");
- }
- ret_demotion = demotion_args.return_value;
- }
-
- if (ret_promotion == 0) {
- pthread_join (promote_thread, NULL);
- if (promotion_args.return_value) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR,
- "Promotion failed!");
- }
- ret_promotion = promotion_args.return_value;
- }
-
- /*Collect previous and current cummulative status */
- ret = ret | ret_demotion | ret_promotion;
-
- /*reseting promotion and demotion arguments for next iteration*/
- memset (&demotion_args, 0, sizeof(demotion_args_t));
- memset (&promotion_args, 0, sizeof(promotion_args_t));
-
- }
-
- ret = 0;
-out:
-
- dict_unref(bricklist_cold);
- dict_unref(bricklist_hot);
-
- return ret;
-}
-
-int32_t
-tier_migration_needed (xlator_t *this)
-{
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- int ret = 0;
-
- conf = this->private;
-
- GF_VALIDATE_OR_GOTO (this->name, conf, out);
- GF_VALIDATE_OR_GOTO (this->name, conf->defrag, out);
-
- defrag = conf->defrag;
-
- if (defrag->cmd == GF_DEFRAG_CMD_START_TIER)
- ret = 1;
-out:
- return ret;
-}
-
-int32_t
-tier_migration_get_dst (xlator_t *this, dht_local_t *local)
-{
- dht_conf_t *conf = NULL;
- int32_t ret = -1;
- gf_defrag_info_t *defrag = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- conf = this->private;
-
- defrag = conf->defrag;
-
- if (defrag && defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
- local->rebalance.target_node = conf->subvolumes[0];
-
- } else if (conf->subvolumes[0] == local->cached_subvol)
- local->rebalance.target_node =
- conf->subvolumes[1];
- else
- local->rebalance.target_node =
- conf->subvolumes[0];
-
- if (local->rebalance.target_node)
- ret = 0;
-
-out:
- return ret;
-}
-
-xlator_t *
-tier_search (xlator_t *this, dht_layout_t *layout, const char *name)
-{
- xlator_t *subvol = NULL;
- void *value;
- int search_first_subvol = 0;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
- GF_VALIDATE_OR_GOTO(this->name, layout, out);
- GF_VALIDATE_OR_GOTO(this->name, name, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- conf = this->private;
-
- defrag = conf->defrag;
- if (defrag && defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
- search_first_subvol = 1;
-
- else if (!dict_get_ptr (this->options, "rule", &value) &&
- !strcmp(layout->list[0].xlator->name, value)) {
- search_first_subvol = 1;
- }
-
- if (search_first_subvol)
- subvol = layout->list[0].xlator;
- else
- subvol = layout->list[1].xlator;
-
-out:
- return subvol;
-}
-
-
-dht_methods_t tier_methods = {
- .migration_get_dst_subvol = tier_migration_get_dst,
- .migration_other = tier_start,
- .migration_needed = tier_migration_needed,
- .layout_search = tier_search,
-};
-
-static int
-tier_load_externals (xlator_t *this)
-{
- int ret = -1;
- char *libpathfull = (LIBDIR "/libgfdb.so." LIBGFDB_VERSION);
- get_gfdb_methods_t get_gfdb_methods;
-
- GF_VALIDATE_OR_GOTO("this", this, out);
-
- libhandle = dlopen (libpathfull, RTLD_NOW);
- if (!libhandle) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR,
- "Error loading libgfdb.so %s\n", dlerror());
- ret = -1;
- goto out;
- }
-
- get_gfdb_methods = dlsym (libhandle, "get_gfdb_methods");
- if (!get_gfdb_methods) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR,
- "Error loading get_gfdb_methods()");
- ret = -1;
- goto out;
- }
-
- get_gfdb_methods (&gfdb_methods);
-
- ret = 0;
-
-out:
- if (ret && libhandle)
- dlclose (libhandle);
-
- return ret;
-}
-
-int
-tier_init (xlator_t *this)
-{
- int ret = -1;
- int freq = 0;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
-
- ret = dht_init(this);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR,
- "dht_init failed");
- goto out;
- }
-
- conf = this->private;
-
- conf->methods = &tier_methods;
-
- if (conf->subvolume_cnt != 2) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR,
- "Invalid number of subvolumes %d", conf->subvolume_cnt);
- goto out;
- }
-
- /* if instatiated from client side initialization is complete. */
- if (!conf->defrag) {
- ret = 0;
- goto out;
- }
-
- /* if instatiated from server side, load db libraries */
- ret = tier_load_externals(this);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- DHT_MSG_LOG_TIER_ERROR,
- "Could not load externals. Aborting");
- goto out;
- }
-
- defrag = conf->defrag;
-
- ret = dict_get_int32 (this->options,
- "tier-promote-frequency", &freq);
- if (ret) {
- freq = DEFAULT_PROMOTE_FREQ_SEC;
- }
-
- defrag->tier_promote_frequency = freq;
-
- ret = dict_get_int32 (this->options,
- "tier-demote-frequency", &freq);
- if (ret) {
- freq = DEFAULT_DEMOTE_FREQ_SEC;
- }
-
- defrag->tier_demote_frequency = freq;
-
- ret = dict_get_int32 (this->options,
- "write-freq-threshold", &freq);
- if (ret) {
- freq = DEFAULT_WRITE_FREQ_SEC;
- }
-
- defrag->write_freq_threshold = freq;
-
- ret = dict_get_int32 (this->options,
- "read-freq-threshold", &freq);
- if (ret) {
- freq = DEFAULT_READ_FREQ_SEC;
- }
-
- defrag->read_freq_threshold = freq;
-
- ret = gf_asprintf(&promotion_qfile, "%s/%s-%d",
- DEFAULT_VAR_RUN_DIRECTORY,
- PROMOTION_QFILE,
- getpid());
- if (ret < 0)
- goto out;
-
- ret = gf_asprintf(&demotion_qfile, "%s/%s-%d",
- DEFAULT_VAR_RUN_DIRECTORY,
- DEMOTION_QFILE,
- getpid());
- if (ret < 0) {
- GF_FREE(promotion_qfile);
- goto out;
- }
-
- gf_msg(this->name, GF_LOG_INFO, 0,
- DHT_MSG_LOG_TIER_STATUS,
- "Promote/demote frequency %d/%d "
- "Write/Read freq thresholds %d/%d",
- defrag->tier_promote_frequency,
- defrag->tier_demote_frequency,
- defrag->write_freq_threshold,
- defrag->read_freq_threshold);
-
- gf_msg(this->name, GF_LOG_INFO, 0,
- DHT_MSG_LOG_TIER_STATUS,
- "Promote file %s demote file %s",
- promotion_qfile, demotion_qfile);
-
- ret = 0;
-
-out:
-
- return ret;
-}
-
-
-int
-tier_reconfigure (xlator_t *this, dict_t *options)
-{
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
-
- conf = this->private;
-
- if (conf->defrag) {
- defrag = conf->defrag;
- GF_OPTION_RECONF ("tier-promote-frequency",
- defrag->tier_promote_frequency, options,
- int32, out);
-
- GF_OPTION_RECONF ("tier-demote-frequency",
- defrag->tier_demote_frequency, options,
- int32, out);
-
- GF_OPTION_RECONF ("write-freq-threshold",
- defrag->write_freq_threshold, options,
- int32, out);
-
- GF_OPTION_RECONF ("read-freq-threshold",
- defrag->read_freq_threshold, options,
- int32, out);
- }
-
-out:
- return dht_reconfigure (this, options);
-}
-
-void
-tier_fini (xlator_t *this)
-{
- if (libhandle)
- dlclose(libhandle);
-
- GF_FREE(demotion_qfile);
- GF_FREE(promotion_qfile);
-
- dht_fini(this);
-}
-
-class_methods_t class_methods = {
- .init = tier_init,
- .fini = tier_fini,
- .reconfigure = tier_reconfigure,
- .notify = dht_notify
-};
-
-
-struct xlator_fops fops = {
- .lookup = dht_lookup,
- .create = dht_create,
- .mknod = dht_mknod,
-
- .stat = dht_stat,
- .fstat = dht_fstat,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .access = dht_access,
- .readlink = dht_readlink,
- .setxattr = dht_setxattr,
- .getxattr = dht_getxattr,
- .removexattr = dht_removexattr,
- .open = dht_open,
- .readv = dht_readv,
- .writev = dht_writev,
- .flush = dht_flush,
- .fsync = dht_fsync,
- .statfs = dht_statfs,
- .lk = dht_lk,
- .opendir = dht_opendir,
- .readdir = dht_readdir,
- .readdirp = dht_readdirp,
- .fsyncdir = dht_fsyncdir,
- .symlink = dht_symlink,
- .unlink = dht_unlink,
- .link = dht_link,
- .mkdir = dht_mkdir,
- .rmdir = dht_rmdir,
- .rename = dht_rename,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
- .entrylk = dht_entrylk,
- .fentrylk = dht_fentrylk,
- .xattrop = dht_xattrop,
- .fxattrop = dht_fxattrop,
- .setattr = dht_setattr,
-};
-
-
-struct xlator_cbks cbks = {
- .forget = dht_forget
-};
-
diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h
deleted file mode 100644
index 7d4ea6b7ebe..00000000000
--- a/xlators/cluster/dht/src/tier.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _TIER_H_
-#define _TIER_H_
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-/******************************************************************************/
-/* This is from dht-rebalancer.c as we dont have dht-rebalancer.h */
-#include "dht-common.h"
-#include "xlator.h"
-#include <signal.h>
-#include <fnmatch.h>
-#include <signal.h>
-
-#define DEFAULT_PROMOTE_FREQ_SEC 120
-#define DEFAULT_DEMOTE_FREQ_SEC 120
-#define DEFAULT_WRITE_FREQ_SEC 120
-#define DEFAULT_READ_FREQ_SEC 120
-
-/*
- * Size of timer wheel. We would not promote or demote less
- * frequently than this number.
- */
-#define TIMER_SECS 3600
-
-#include "gfdb_data_store.h"
-#include <ctype.h>
-#include <sys/stat.h>
-
-#define PROMOTION_QFILE "promotequeryfile"
-#define DEMOTION_QFILE "demotequeryfile"
-
-#define GET_QFILE_PATH(is_promotion)\
- (is_promotion) ? promotion_qfile : demotion_qfile
-
-typedef struct _query_cbk_args {
- xlator_t *this;
- gf_defrag_info_t *defrag;
- FILE *queryFILE;
- int is_promotion;
-} query_cbk_args_t;
-
-int
-gf_run_tier(xlator_t *this, gf_defrag_info_t *defrag);
-
-typedef struct _gfdb_brick_dict_info {
- gfdb_time_t *time_stamp;
- gf_boolean_t _gfdb_promote;
- query_cbk_args_t *_query_cbk_args;
-} _gfdb_brick_dict_info_t;
-
-typedef struct _dm_thread_args {
- xlator_t *this;
- gf_defrag_info_t *defrag;
- dict_t *brick_list;
- int freq_time;
- int return_value;
-} promotion_args_t, demotion_args_t;
-
-#endif
diff --git a/xlators/cluster/dht/src/unittest/dht_layout_mock.c b/xlators/cluster/dht/src/unittest/dht_layout_mock.c
index faf871d49be..771452963d1 100644
--- a/xlators/cluster/dht/src/unittest/dht_layout_mock.c
+++ b/xlators/cluster/dht/src/unittest/dht_layout_mock.c
@@ -7,71 +7,67 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "xlator.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
#include "dht-common.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
int
-dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p)
+dht_hash_compute(xlator_t *this, int type, const char *name, uint32_t *hash_p)
{
return 0;
}
int
-dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this, dht_layout_t **layout)
+dht_inode_ctx_layout_get(inode_t *inode, xlator_t *this, dht_layout_t **layout)
{
return 0;
}
int
-dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this,
- dht_layout_t *layout_int)
+dht_inode_ctx_layout_set(inode_t *inode, xlator_t *this,
+ dht_layout_t *layout_int)
{
return 0;
}
int
-dict_get_ptr (dict_t *this, char *key, void **ptr)
+dict_get_ptr(dict_t *this, char *key, void **ptr)
{
return 0;
}
int
-dict_get_ptr_and_len (dict_t *this, char *key, void **ptr, int *len)
+dict_get_ptr_and_len(dict_t *this, char *key, void **ptr, int *len)
{
return 0;
}
-int _gf_log (const char *domain, const char *file,
- const char *function, int32_t line, gf_loglevel_t level,
- const char *fmt, ...)
+int
+_gf_log(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, const char *fmt, ...)
{
return 0;
}
-int _gf_log_callingfn (const char *domain, const char *file,
- const char *function, int32_t line, gf_loglevel_t level,
- const char *fmt, ...)
+int
+_gf_log_callingfn(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, const char *fmt, ...)
{
return 0;
}
-void gf_uuid_unparse(const uuid_t uu, char *out)
+void
+gf_uuid_unparse(const uuid_t uu, char *out)
{
// could call a will-return function here
// to place the correct data in *out
}
int
-_gf_msg (const char *domain, const char *file, const char *function,
- int32_t line, gf_loglevel_t level, int errnum, int trace,
- uint64_t msgid, const char *fmt, ...)
+_gf_msg(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, int errnum, int trace,
+ uint64_t msgid, const char *fmt, ...)
{
return 0;
}
diff --git a/xlators/cluster/dht/src/unittest/dht_layout_unittest.c b/xlators/cluster/dht/src/unittest/dht_layout_unittest.c
index 3702af366f9..c94a1d0a2e1 100644
--- a/xlators/cluster/dht/src/unittest/dht_layout_unittest.c
+++ b/xlators/cluster/dht/src/unittest/dht_layout_unittest.c
@@ -9,8 +9,8 @@
*/
#include "dht-common.h"
-#include "logging.h"
-#include "xlator.h"
+#include <glusterfs/logging.h>
+#include <glusterfs/xlator.h>
#include <inttypes.h>
#include <stdarg.h>
@@ -33,16 +33,17 @@ helper_xlator_init(uint32_t num_types)
xl = test_calloc(1, sizeof(xlator_t));
assert_non_null(xl);
- xl->mem_acct.num_types = num_types;
- xl->mem_acct.rec = test_calloc(num_types, sizeof(struct mem_acct_rec));
- assert_non_null(xl->mem_acct.rec);
+ xl->mem_acct->num_types = num_types;
+ xl->mem_acct = test_calloc(sizeof(struct mem_acct) +
+ sizeof(struct mem_acct_rec) + num_types);
+ assert_non_null(xl->mem_acct);
xl->ctx = test_calloc(1, sizeof(glusterfs_ctx_t));
assert_non_null(xl->ctx);
for (i = 0; i < num_types; i++) {
- ret = LOCK_INIT(&(xl->mem_acct.rec[i].lock));
- assert_false(ret);
+ ret = LOCK_INIT(&(xl->mem_acct.rec[i].lock));
+ assert_false(ret);
}
ENSURE(num_types == xl->mem_acct.num_types);
@@ -57,8 +58,8 @@ helper_xlator_destroy(xlator_t *xl)
int i, ret;
for (i = 0; i < xl->mem_acct.num_types; i++) {
- ret = LOCK_DESTROY(&(xl->mem_acct.rec[i].lock));
- assert_int_equal(ret, 0);
+ ret = LOCK_DESTROY(&(xl->mem_acct.rec[i].lock));
+ assert_int_equal(ret, 0);
}
free(xl->mem_acct.rec);
@@ -75,7 +76,7 @@ test_dht_layout_new(void **state)
{
xlator_t *xl;
dht_layout_t *layout;
- dht_conf_t *conf;
+ dht_conf_t *conf;
int cnt;
expect_assert_failure(dht_layout_new(NULL, 0));
@@ -89,7 +90,7 @@ test_dht_layout_new(void **state)
assert_non_null(layout);
assert_int_equal(layout->type, DHT_HASH_TYPE_DM);
assert_int_equal(layout->cnt, cnt);
- assert_int_equal(layout->ref, 1);
+ assert_int_equal(GF_ATOMIC_GET(layout->ref), 1);
assert_int_equal(layout->gen, 0);
assert_int_equal(layout->spread_cnt, 0);
free(layout);
@@ -106,7 +107,7 @@ test_dht_layout_new(void **state)
assert_non_null(layout);
assert_int_equal(layout->type, DHT_HASH_TYPE_DM);
assert_int_equal(layout->cnt, cnt);
- assert_int_equal(layout->ref, 1);
+ assert_int_equal(GF_ATOMIC_GET(layout->ref), 1);
assert_int_equal(layout->gen, conf->gen);
assert_int_equal(layout->spread_cnt, conf->dir_spread_cnt);
free(layout);
@@ -115,7 +116,9 @@ test_dht_layout_new(void **state)
helper_xlator_destroy(xl);
}
-int main(void) {
+int
+main(void)
+{
const struct CMUnitTest xlator_dht_layout_tests[] = {
unit_test(test_dht_layout_new),
};
diff --git a/xlators/cluster/ec/src/Makefile.am b/xlators/cluster/ec/src/Makefile.am
index 12d87f99e4d..406a636bbc2 100644
--- a/xlators/cluster/ec/src/Makefile.am
+++ b/xlators/cluster/ec/src/Makefile.am
@@ -12,8 +12,11 @@ ec_sources += ec-dir-write.c
ec_sources += ec-inode-read.c
ec_sources += ec-inode-write.c
ec_sources += ec-combine.c
-ec_sources += ec-gf.c
ec_sources += ec-method.c
+ec_sources += ec-galois.c
+ec_sources += ec-code.c
+ec_sources += ec-code-c.c
+ec_sources += ec-gf8.c
ec_sources += ec-heal.c
ec_sources += ec-heald.c
@@ -24,22 +27,50 @@ ec_headers += ec-data.h
ec_headers += ec-fops.h
ec_headers += ec-common.h
ec_headers += ec-combine.h
-ec_headers += ec-gf.h
ec_headers += ec-method.h
+ec_headers += ec-galois.h
+ec_headers += ec-code.h
+ec_headers += ec-code-c.h
+ec_headers += ec-gf8.h
ec_headers += ec-heald.h
+ec_headers += ec-messages.h
+ec_headers += ec-types.h
+
+if ENABLE_EC_DYNAMIC_INTEL
+ ec_sources += ec-code-intel.c
+ ec_headers += ec-code-intel.h
+endif
+
+if ENABLE_EC_DYNAMIC_X64
+ ec_sources += ec-code-x64.c
+ ec_headers += ec-code-x64.h
+endif
+
+if ENABLE_EC_DYNAMIC_SSE
+ ec_sources += ec-code-sse.c
+ ec_headers += ec-code-sse.h
+endif
+
+if ENABLE_EC_DYNAMIC_AVX
+ ec_sources += ec-code-avx.c
+ ec_headers += ec-code-avx.h
+endif
ec_ext_sources = $(top_builddir)/xlators/lib/src/libxlator.c
ec_ext_headers = $(top_builddir)/xlators/lib/src/libxlator.h
-ec_la_LDFLAGS = -module -avoid-version
+ec_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
ec_la_SOURCES = $(ec_sources) $(ec_headers) $(ec_ext_sources) $(ec_ext_headers)
ec_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CPPFLAGS = $(GF_CPPFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS)
AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src
AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src
AM_CPPFLAGS += -I$(top_srcdir)/rpc/rpc-lib/src
+AM_CPPFLAGS += -I$(top_srcdir)/rpc/xdr/src
+AM_CPPFLAGS += -I$(top_builddir)/rpc/xdr/src
+AM_CPPFLAGS += -DGLUSTERFS_LIBEXECDIR=\"$(GLUSTERFS_LIBEXECDIR)\"
AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/xlators/cluster/ec/src/ec-code-avx.c b/xlators/cluster/ec/src/ec-code-avx.c
new file mode 100644
index 00000000000..70afaa00f54
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code-avx.c
@@ -0,0 +1,109 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <errno.h>
+
+#include "ec-code-intel.h"
+
+static void
+ec_code_avx_prolog(ec_code_builder_t *builder)
+{
+ builder->loop = builder->address;
+}
+
+static void
+ec_code_avx_epilog(ec_code_builder_t *builder)
+{
+ ec_code_intel_op_add_i2r(builder, 32, REG_DX);
+ ec_code_intel_op_add_i2r(builder, 32, REG_DI);
+ ec_code_intel_op_test_i2r(builder, builder->width - 1, REG_DX);
+ ec_code_intel_op_jne(builder, builder->loop);
+
+ ec_code_intel_op_ret(builder, 0);
+}
+
+static void
+ec_code_avx_load(ec_code_builder_t *builder, uint32_t dst, uint32_t idx,
+ uint32_t bit)
+{
+ if (builder->linear) {
+ ec_code_intel_op_mov_m2avx(
+ builder, REG_SI, REG_DX, 1,
+ idx * builder->width * builder->bits + bit * builder->width, dst);
+ } else {
+ if (builder->base != idx) {
+ ec_code_intel_op_mov_m2r(builder, REG_SI, REG_NULL, 0, idx * 8,
+ REG_AX);
+ builder->base = idx;
+ }
+ ec_code_intel_op_mov_m2avx(builder, REG_AX, REG_DX, 1,
+ bit * builder->width, dst);
+ }
+}
+
+static void
+ec_code_avx_store(ec_code_builder_t *builder, uint32_t src, uint32_t bit)
+{
+ ec_code_intel_op_mov_avx2m(builder, src, REG_DI, REG_NULL, 0,
+ bit * builder->width);
+}
+
+static void
+ec_code_avx_copy(ec_code_builder_t *builder, uint32_t dst, uint32_t src)
+{
+ ec_code_intel_op_mov_avx2avx(builder, src, dst);
+}
+
+static void
+ec_code_avx_xor2(ec_code_builder_t *builder, uint32_t dst, uint32_t src)
+{
+ ec_code_intel_op_xor_avx2avx(builder, src, dst);
+}
+
+static void
+ec_code_avx_xor3(ec_code_builder_t *builder, uint32_t dst, uint32_t src1,
+ uint32_t src2)
+{
+ ec_code_intel_op_mov_avx2avx(builder, src1, dst);
+ ec_code_intel_op_xor_avx2avx(builder, src2, dst);
+}
+
+static void
+ec_code_avx_xorm(ec_code_builder_t *builder, uint32_t dst, uint32_t idx,
+ uint32_t bit)
+{
+ if (builder->linear) {
+ ec_code_intel_op_xor_m2avx(
+ builder, REG_SI, REG_DX, 1,
+ idx * builder->width * builder->bits + bit * builder->width, dst);
+ } else {
+ if (builder->base != idx) {
+ ec_code_intel_op_mov_m2r(builder, REG_SI, REG_NULL, 0, idx * 8,
+ REG_AX);
+ builder->base = idx;
+ }
+ ec_code_intel_op_xor_m2avx(builder, REG_AX, REG_DX, 1,
+ bit * builder->width, dst);
+ }
+}
+
+static char *ec_code_avx_needed_flags[] = {"avx2", NULL};
+
+ec_code_gen_t ec_code_gen_avx = {.name = "avx",
+ .flags = ec_code_avx_needed_flags,
+ .width = 32,
+ .prolog = ec_code_avx_prolog,
+ .epilog = ec_code_avx_epilog,
+ .load = ec_code_avx_load,
+ .store = ec_code_avx_store,
+ .copy = ec_code_avx_copy,
+ .xor2 = ec_code_avx_xor2,
+ .xor3 = ec_code_avx_xor3,
+ .xorm = ec_code_avx_xorm};
diff --git a/xlators/cluster/dht/src/dht-helper.h b/xlators/cluster/ec/src/ec-code-avx.h
index e3ab9c4d93b..fdca4ad2c8f 100644
--- a/xlators/cluster/dht/src/dht-helper.h
+++ b/xlators/cluster/ec/src/ec-code-avx.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2008-2014 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -7,13 +7,12 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#ifndef _DHT_HELPER_H
-#define _DHT_HELPER_H
-int
-dht_lock_order_requests (dht_lock_t **lk_array, int count);
+#ifndef __EC_CODE_AVX_H__
+#define __EC_CODE_AVX_H__
-void
-dht_blocking_inodelk_rec (call_frame_t *frame, int i);
+#include "ec-code.h"
-#endif
+extern ec_code_gen_t ec_code_gen_avx;
+
+#endif /* __EC_CODE_AVX_H__ */
diff --git a/xlators/cluster/ec/src/ec-code-c.c b/xlators/cluster/ec/src/ec-code-c.c
new file mode 100644
index 00000000000..acdc665c2cf
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code-c.c
@@ -0,0 +1,11679 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <inttypes.h>
+#include <string.h>
+
+#include "ec-method.h"
+#include "ec-code-c.h"
+
+#define WIDTH (EC_METHOD_WORD_SIZE / sizeof(uint64_t))
+
+static void
+gf8_muladd_00(void *out, void *in)
+{
+ memcpy(out, in, EC_METHOD_WORD_SIZE * 8);
+}
+
+static void
+gf8_muladd_01(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ out_ptr[0] ^= in_ptr[0];
+ out_ptr[WIDTH] ^= in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] ^= in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] ^= in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] ^= in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] ^= in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] ^= in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] ^= in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_02(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in7;
+ out1 = in0;
+ out7 = in6;
+ out5 = in4;
+ out6 = in5;
+ out3 = in2 ^ in7;
+ out4 = in3 ^ in7;
+ out2 = in1 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_03(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in0 ^ in7;
+ tmp0 = in2 ^ in7;
+ out1 = in0 ^ in1;
+ out7 = in6 ^ in7;
+ out5 = in4 ^ in5;
+ out6 = in5 ^ in6;
+ out4 = in3 ^ in4 ^ in7;
+ out2 = tmp0 ^ in1;
+ out3 = tmp0 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_04(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in6;
+ out1 = in7;
+ out7 = in5;
+ out6 = in4;
+ tmp0 = in6 ^ in7;
+ out2 = in0 ^ in6;
+ out5 = in3 ^ in7;
+ out3 = tmp0 ^ in1;
+ out4 = tmp0 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_05(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in0 ^ in6;
+ out1 = in1 ^ in7;
+ out7 = in5 ^ in7;
+ out6 = in4 ^ in6;
+ out2 = out0 ^ in2;
+ out3 = out1 ^ in3 ^ in6;
+ out5 = out7 ^ in3;
+ out4 = out6 ^ in2 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_06(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in6 ^ in7;
+ tmp0 = in1 ^ in6;
+ out1 = in0 ^ in7;
+ out7 = in5 ^ in6;
+ out6 = in4 ^ in5;
+ out4 = in2 ^ in3 ^ in6;
+ out5 = in3 ^ in4 ^ in7;
+ out3 = tmp0 ^ in2;
+ out2 = tmp0 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_07(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in6;
+ tmp1 = in5 ^ in6;
+ tmp2 = in0 ^ in7;
+ tmp3 = tmp0 ^ in3;
+ out6 = tmp1 ^ in4;
+ out7 = tmp1 ^ in7;
+ out0 = tmp2 ^ in6;
+ out1 = tmp2 ^ in1;
+ out3 = tmp3 ^ in1;
+ out4 = tmp3 ^ in4;
+ out5 = out4 ^ out7 ^ in2;
+ out2 = tmp0 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_08(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in5;
+ out1 = in6;
+ out7 = in4;
+ out6 = in3 ^ in7;
+ out3 = in0 ^ in5 ^ in6;
+ out5 = in2 ^ in6 ^ in7;
+ out2 = in5 ^ in7;
+ out4 = out2 ^ in1 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_09(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in0 ^ in5;
+ tmp0 = in3 ^ in6;
+ out1 = in1 ^ in6;
+ out7 = in4 ^ in7;
+ out2 = in2 ^ in5 ^ in7;
+ out3 = tmp0 ^ out0;
+ out6 = tmp0 ^ in7;
+ out4 = out1 ^ out7 ^ in5;
+ out5 = out2 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_0A(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in5 ^ in7;
+ out1 = in0 ^ in6;
+ out7 = in4 ^ in6;
+ out2 = in1 ^ in5;
+ out6 = out0 ^ in3;
+ out3 = out0 ^ out1 ^ in2;
+ out5 = out7 ^ in2 ^ in7;
+ out4 = out2 ^ in3 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_0B(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in5;
+ tmp1 = in0 ^ in6;
+ tmp2 = in4 ^ in7;
+ out0 = in0 ^ in5 ^ in7;
+ out2 = tmp0 ^ in1;
+ out1 = tmp1 ^ in1;
+ out6 = tmp1 ^ out0 ^ in3;
+ out7 = tmp2 ^ in6;
+ out4 = tmp2 ^ out6 ^ in1;
+ out3 = out6 ^ in0 ^ in2;
+ out5 = tmp0 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_0C(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in5 ^ in6;
+ out1 = in6 ^ in7;
+ out7 = in4 ^ in5;
+ tmp0 = in1 ^ in5;
+ tmp1 = in0 ^ in7;
+ out5 = in2 ^ in3 ^ in6;
+ out6 = in3 ^ in4 ^ in7;
+ out2 = tmp1 ^ out0;
+ out4 = tmp0 ^ in2;
+ out3 = tmp0 ^ tmp1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_0D(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in4 ^ in5;
+ tmp1 = in5 ^ in6;
+ out1 = in1 ^ in6 ^ in7;
+ out7 = tmp0 ^ in7;
+ out4 = tmp0 ^ in1 ^ in2;
+ out0 = tmp1 ^ in0;
+ tmp2 = tmp1 ^ in3;
+ out6 = tmp2 ^ out7;
+ out2 = out0 ^ in2 ^ in7;
+ out3 = out0 ^ out1 ^ in3;
+ out5 = tmp2 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_0E(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in2 ^ in5;
+ tmp2 = in5 ^ in6;
+ out1 = in0 ^ in6 ^ in7;
+ out3 = tmp0 ^ tmp1;
+ out2 = tmp0 ^ tmp2;
+ tmp3 = tmp1 ^ in3;
+ out7 = tmp2 ^ in4;
+ out0 = tmp2 ^ in7;
+ out4 = tmp3 ^ in1 ^ in7;
+ out5 = tmp3 ^ out7;
+ out6 = out0 ^ out5 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_0F(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in6 ^ in7;
+ tmp1 = tmp0 ^ in1;
+ tmp2 = tmp0 ^ in5;
+ out1 = tmp1 ^ in0;
+ out7 = tmp2 ^ in4;
+ out0 = tmp2 ^ in0;
+ out6 = out7 ^ in3;
+ out5 = out6 ^ in2 ^ in7;
+ tmp3 = tmp1 ^ out0 ^ in2;
+ out4 = tmp1 ^ out5;
+ out2 = tmp3 ^ in6;
+ out3 = tmp3 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_10(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in4;
+ out1 = in5;
+ out7 = in3 ^ in7;
+ tmp0 = in6 ^ in7;
+ out2 = in4 ^ in6;
+ tmp1 = out2 ^ in5;
+ out6 = tmp0 ^ in2;
+ out3 = tmp0 ^ tmp1;
+ out5 = out2 ^ out3 ^ in1;
+ out4 = tmp1 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_11(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out7 = in3;
+ out0 = in0 ^ in4;
+ out1 = in1 ^ in5;
+ out6 = in2 ^ in7;
+ out4 = in0 ^ in5 ^ in6;
+ out5 = in1 ^ in6 ^ in7;
+ out2 = in2 ^ in4 ^ in6;
+ out3 = in3 ^ in4 ^ in5 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_12(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in4 ^ in7;
+ out1 = in0 ^ in5;
+ out3 = in2 ^ in4 ^ in5;
+ tmp0 = out0 ^ in6;
+ out2 = tmp0 ^ in1;
+ tmp1 = tmp0 ^ in3;
+ out6 = tmp0 ^ out3;
+ out5 = out2 ^ in5;
+ out7 = tmp1 ^ in4;
+ out4 = tmp1 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_13(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out7 = in3 ^ in6;
+ tmp0 = in0 ^ in5;
+ tmp1 = in4 ^ in7;
+ out6 = in2 ^ in5 ^ in7;
+ out4 = tmp0 ^ out7 ^ in7;
+ out1 = tmp0 ^ in1;
+ out0 = tmp1 ^ in0;
+ out5 = tmp1 ^ in1 ^ in6;
+ out3 = tmp1 ^ out6 ^ in3;
+ out2 = out5 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_14(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in4 ^ in6;
+ out1 = in5 ^ in7;
+ out2 = in0 ^ in4;
+ tmp0 = out0 ^ in5;
+ out7 = out1 ^ in3;
+ tmp1 = out1 ^ in2;
+ out3 = tmp0 ^ in1;
+ out6 = tmp0 ^ tmp1;
+ out4 = tmp1 ^ out2;
+ out5 = out3 ^ in3 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_15(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out7 = in3 ^ in5;
+ tmp0 = in0 ^ in4;
+ out1 = in1 ^ in5 ^ in7;
+ out5 = in1 ^ in3 ^ in6;
+ out0 = tmp0 ^ in6;
+ out2 = tmp0 ^ in2;
+ out3 = out5 ^ in4 ^ in5;
+ out6 = out2 ^ in0 ^ in7;
+ out4 = tmp0 ^ out6 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_16(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in5;
+ tmp1 = in4 ^ in7;
+ tmp2 = in2 ^ in3 ^ in4;
+ out1 = tmp0 ^ in7;
+ out4 = tmp0 ^ tmp2;
+ out0 = tmp1 ^ in6;
+ tmp3 = tmp1 ^ in1;
+ out6 = out0 ^ in2 ^ in5;
+ out2 = tmp3 ^ in0;
+ out3 = out6 ^ in1;
+ out7 = tmp2 ^ out6;
+ out5 = tmp3 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_17(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in5;
+ tmp1 = in3 ^ in6;
+ tmp2 = tmp0 ^ in4;
+ out4 = tmp0 ^ in0 ^ in3;
+ out7 = tmp1 ^ in5;
+ tmp3 = tmp1 ^ in1;
+ out6 = tmp2 ^ in7;
+ out5 = tmp3 ^ in4;
+ out3 = tmp3 ^ out6;
+ out0 = out3 ^ out4 ^ in1;
+ out2 = out3 ^ out7 ^ in0;
+ out1 = tmp2 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_18(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in4 ^ in5;
+ out1 = in5 ^ in6;
+ tmp0 = in4 ^ in7;
+ out5 = in1 ^ in2 ^ in5;
+ out6 = in2 ^ in3 ^ in6;
+ out2 = tmp0 ^ out1;
+ out7 = tmp0 ^ in3;
+ tmp1 = tmp0 ^ in0;
+ out3 = tmp1 ^ in6;
+ out4 = tmp1 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_19(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out5 = in1 ^ in2;
+ out7 = in3 ^ in4;
+ tmp0 = in0 ^ in7;
+ out6 = in2 ^ in3;
+ out1 = in1 ^ in5 ^ in6;
+ out0 = in0 ^ in4 ^ in5;
+ out4 = tmp0 ^ in1;
+ tmp1 = tmp0 ^ in6;
+ out2 = tmp1 ^ out0 ^ in2;
+ out3 = tmp1 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_1A(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in4 ^ in5;
+ tmp1 = in5 ^ in6;
+ tmp2 = tmp0 ^ in1;
+ out0 = tmp0 ^ in7;
+ out1 = tmp1 ^ in0;
+ tmp3 = tmp1 ^ in3;
+ out5 = tmp2 ^ in2;
+ out2 = tmp2 ^ in6;
+ out7 = tmp3 ^ out0;
+ out6 = tmp3 ^ in2;
+ out4 = tmp3 ^ out2 ^ in0;
+ out3 = tmp0 ^ out1 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_1B(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in4;
+ tmp1 = in2 ^ in5;
+ tmp2 = in3 ^ in6;
+ out5 = tmp0 ^ in1;
+ tmp3 = tmp0 ^ in0;
+ out6 = tmp1 ^ in3;
+ out0 = tmp1 ^ tmp3 ^ in7;
+ out7 = tmp2 ^ in4;
+ tmp4 = out5 ^ in6;
+ out3 = tmp2 ^ tmp3;
+ out2 = tmp4 ^ in5;
+ out4 = tmp4 ^ out3;
+ out1 = tmp3 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_1C(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in3;
+ tmp1 = in4 ^ in6;
+ tmp2 = in5 ^ in7;
+ out6 = tmp0 ^ tmp1;
+ out0 = tmp1 ^ in5;
+ out1 = tmp2 ^ in6;
+ tmp3 = tmp2 ^ in1;
+ tmp4 = tmp2 ^ in4;
+ out2 = tmp4 ^ in0;
+ out7 = tmp4 ^ in3;
+ out5 = tmp0 ^ tmp3;
+ out3 = tmp3 ^ out2;
+ out4 = out3 ^ in2 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_1D(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in3;
+ tmp1 = in0 ^ in4;
+ tmp2 = in3 ^ in4;
+ tmp3 = in2 ^ in7;
+ out3 = tmp0 ^ tmp1;
+ out5 = tmp0 ^ tmp3;
+ tmp4 = tmp1 ^ in5;
+ out6 = tmp2 ^ in2;
+ out7 = tmp2 ^ in5;
+ out2 = tmp3 ^ tmp4;
+ out4 = out3 ^ out6 ^ in6;
+ out0 = tmp4 ^ in6;
+ out1 = out2 ^ out4 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_1E(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in4;
+ tmp1 = in2 ^ in7;
+ tmp2 = tmp0 ^ in1;
+ out3 = tmp1 ^ tmp2;
+ out2 = tmp2 ^ in5;
+ out4 = out3 ^ in3 ^ in6;
+ tmp3 = out4 ^ in7;
+ out6 = tmp3 ^ out2 ^ in4;
+ out7 = tmp1 ^ out6;
+ out0 = out7 ^ in3;
+ out1 = tmp0 ^ out0;
+ out5 = tmp3 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_1F(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in4 ^ in6;
+ tmp1 = tmp0 ^ in5;
+ out7 = tmp1 ^ in3;
+ out0 = tmp1 ^ in0 ^ in7;
+ out6 = out7 ^ in2 ^ in6;
+ out1 = out0 ^ in1 ^ in4;
+ out4 = out0 ^ out6 ^ in1;
+ out3 = tmp0 ^ out4;
+ out2 = out4 ^ out7 ^ in7;
+ out5 = out3 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_20(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in4;
+ out0 = in3 ^ in7;
+ tmp0 = in3 ^ in4;
+ tmp1 = in6 ^ in7;
+ out2 = out0 ^ in5;
+ out4 = tmp0 ^ in5;
+ out3 = tmp0 ^ tmp1;
+ out7 = tmp1 ^ in2;
+ out6 = tmp1 ^ in1 ^ in5;
+ out5 = out2 ^ out3 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_21(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in1 ^ in4;
+ tmp0 = in4 ^ in6;
+ out4 = in3 ^ in5;
+ out7 = in2 ^ in6;
+ out0 = in0 ^ in3 ^ in7;
+ out6 = in1 ^ in5 ^ in7;
+ out3 = tmp0 ^ in7;
+ out5 = tmp0 ^ in0;
+ out2 = out4 ^ in2 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_22(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in3;
+ out1 = in0 ^ in4;
+ out7 = in2 ^ in7;
+ out4 = in4 ^ in5 ^ in7;
+ out5 = in0 ^ in5 ^ in6;
+ out6 = in1 ^ in6 ^ in7;
+ out3 = in2 ^ in3 ^ in4 ^ in6;
+ out2 = in1 ^ in3 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_23(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out7 = in2;
+ out0 = in0 ^ in3;
+ out4 = in5 ^ in7;
+ out5 = in0 ^ in6;
+ out6 = in1 ^ in7;
+ out3 = in2 ^ in4 ^ in6;
+ out1 = in0 ^ in1 ^ in4;
+ out2 = out4 ^ out6 ^ in2 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_24(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in4 ^ in7;
+ tmp0 = in3 ^ in4;
+ out0 = in3 ^ in6 ^ in7;
+ out3 = tmp0 ^ in1;
+ tmp1 = out0 ^ in5;
+ out6 = tmp1 ^ out3;
+ out2 = tmp1 ^ in0;
+ out7 = tmp1 ^ in2 ^ in3;
+ out5 = out2 ^ in4;
+ out4 = tmp0 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_25(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in1 ^ in4;
+ tmp0 = in2 ^ in5;
+ out1 = out3 ^ in7;
+ out7 = tmp0 ^ in6;
+ out6 = out1 ^ in5;
+ out4 = out7 ^ in3 ^ in7;
+ out2 = out4 ^ in0;
+ out0 = tmp0 ^ out2;
+ out5 = out0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_26(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in3 ^ in6;
+ tmp0 = in4 ^ in7;
+ out7 = in2 ^ in5 ^ in7;
+ tmp1 = out0 ^ in0 ^ in5;
+ out1 = tmp0 ^ in0;
+ tmp2 = tmp0 ^ in6;
+ out2 = tmp1 ^ in1;
+ out5 = tmp1 ^ in7;
+ out6 = tmp2 ^ in1;
+ out4 = tmp2 ^ out7;
+ out3 = out0 ^ out6 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_27(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out7 = in2 ^ in5;
+ out0 = in0 ^ in3 ^ in6;
+ out6 = in1 ^ in4 ^ in7;
+ out4 = out7 ^ in6;
+ out2 = out0 ^ out7 ^ in1;
+ out5 = out0 ^ in7;
+ out1 = out6 ^ in0;
+ out3 = out6 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_28(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in3;
+ out1 = in4 ^ in6;
+ out0 = in3 ^ in5 ^ in7;
+ tmp0 = out1 ^ in7;
+ tmp1 = out0 ^ in4;
+ out7 = tmp0 ^ in2;
+ tmp2 = tmp0 ^ in1;
+ out3 = tmp1 ^ in0;
+ out6 = tmp1 ^ tmp2;
+ out4 = tmp2 ^ in3;
+ out5 = out3 ^ in2 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_29(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in2 ^ in3;
+ tmp0 = in1 ^ in3;
+ tmp1 = in4 ^ in6;
+ tmp2 = in0 ^ in4 ^ in7;
+ out6 = tmp0 ^ in5;
+ out4 = tmp0 ^ in6 ^ in7;
+ out1 = tmp1 ^ in1;
+ out7 = tmp1 ^ in2;
+ out3 = tmp2 ^ in5;
+ out5 = tmp2 ^ in2;
+ out0 = out3 ^ in3 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_2A(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in3 ^ in5;
+ tmp0 = in1 ^ in3;
+ tmp1 = in0 ^ in4;
+ out7 = in2 ^ in4 ^ in7;
+ out3 = tmp1 ^ out0 ^ in2;
+ out2 = tmp0 ^ in7;
+ out6 = tmp0 ^ in6;
+ out1 = tmp1 ^ in6;
+ out5 = tmp1 ^ out7 ^ in5;
+ out4 = out1 ^ in0 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_2B(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in1 ^ in6;
+ out7 = in2 ^ in4;
+ tmp0 = in0 ^ in5;
+ tmp1 = in2 ^ in7;
+ out6 = in1 ^ in3;
+ out1 = out4 ^ in0 ^ in4;
+ out3 = tmp0 ^ out7;
+ out0 = tmp0 ^ in3;
+ out5 = tmp1 ^ in0;
+ out2 = tmp1 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_2C(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in5;
+ tmp1 = in2 ^ in3 ^ in4;
+ tmp2 = tmp0 ^ in6;
+ out4 = tmp1 ^ in1;
+ out5 = tmp1 ^ in0 ^ in5;
+ tmp3 = tmp2 ^ in4;
+ out6 = tmp2 ^ out4;
+ out7 = tmp3 ^ in7;
+ out2 = tmp3 ^ out5;
+ out3 = out6 ^ in0;
+ out0 = tmp1 ^ out7;
+ out1 = tmp0 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_2D(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in3;
+ out4 = tmp0 ^ in1;
+ tmp1 = tmp0 ^ in0;
+ out2 = tmp1 ^ in6;
+ out5 = tmp1 ^ in4;
+ tmp2 = out2 ^ in2;
+ tmp3 = tmp2 ^ in5;
+ out0 = tmp3 ^ in7;
+ out7 = tmp3 ^ out5;
+ out6 = out4 ^ out7 ^ in6;
+ out3 = tmp2 ^ out6;
+ out1 = out0 ^ out6 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_2E(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in4 ^ in7;
+ out0 = in3 ^ in5 ^ in6;
+ tmp1 = tmp0 ^ in0;
+ tmp2 = tmp0 ^ in2;
+ out1 = tmp1 ^ in6;
+ out4 = tmp2 ^ in1;
+ out7 = tmp2 ^ in5;
+ out3 = out0 ^ out4 ^ in0;
+ out2 = out3 ^ out7 ^ in7;
+ out6 = tmp1 ^ out2;
+ out5 = tmp1 ^ out7 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_2F(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in3;
+ tmp1 = in2 ^ in5;
+ out4 = in1 ^ in2 ^ in7;
+ out6 = in1 ^ in3 ^ in4;
+ out5 = tmp0 ^ in2;
+ tmp2 = tmp0 ^ in6;
+ out7 = tmp1 ^ in4;
+ out0 = tmp2 ^ in5;
+ out2 = tmp2 ^ out4;
+ out1 = tmp2 ^ out6 ^ in7;
+ out3 = tmp1 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_30(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in4 ^ in5;
+ tmp0 = in3 ^ in6;
+ tmp1 = in4 ^ in7;
+ out6 = in1 ^ in2 ^ in5;
+ out3 = tmp0 ^ in5;
+ out4 = tmp0 ^ in0;
+ out7 = tmp0 ^ in2;
+ out0 = tmp1 ^ in3;
+ out2 = tmp1 ^ out3;
+ out5 = tmp1 ^ in0 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_31(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in5 ^ in6;
+ tmp0 = in4 ^ in5;
+ tmp1 = in0 ^ in3 ^ in4;
+ tmp2 = out3 ^ in2;
+ out1 = tmp0 ^ in1;
+ out0 = tmp1 ^ in7;
+ out4 = tmp1 ^ in6;
+ out6 = tmp2 ^ in1;
+ out2 = tmp2 ^ out0 ^ in0;
+ out5 = out1 ^ in0 ^ in7;
+ out7 = tmp0 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_32(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in3 ^ in4;
+ out7 = in2 ^ in3;
+ tmp0 = in5 ^ in6;
+ tmp1 = in0 ^ in7;
+ out6 = in1 ^ in2;
+ out1 = in0 ^ in4 ^ in5;
+ out2 = tmp0 ^ out0 ^ in1;
+ out3 = tmp0 ^ out7 ^ in7;
+ out4 = tmp1 ^ in6;
+ out5 = tmp1 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_33(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in3;
+ tmp1 = in0 ^ in4;
+ tmp2 = in1 ^ in5;
+ out6 = in1 ^ in2 ^ in6;
+ out7 = tmp0 ^ in7;
+ out0 = tmp1 ^ in3;
+ out1 = tmp1 ^ tmp2;
+ tmp3 = tmp2 ^ in7;
+ tmp4 = tmp2 ^ in4 ^ in6;
+ out5 = tmp3 ^ in0;
+ out3 = tmp3 ^ out6;
+ out4 = tmp4 ^ out5;
+ out2 = tmp0 ^ tmp4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_34(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in4;
+ tmp1 = in4 ^ in5;
+ tmp2 = tmp0 ^ in1;
+ tmp3 = tmp0 ^ in6;
+ out1 = tmp1 ^ in7;
+ tmp4 = tmp1 ^ in2;
+ out5 = tmp2 ^ in0;
+ out3 = tmp2 ^ out1;
+ out0 = tmp3 ^ in7;
+ out7 = tmp3 ^ tmp4;
+ out6 = tmp4 ^ in1;
+ out2 = out3 ^ out5 ^ in3;
+ out4 = tmp4 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_35(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in6;
+ tmp1 = in5 ^ in7;
+ out7 = tmp0 ^ tmp1 ^ in3;
+ out3 = tmp1 ^ in1;
+ out1 = out3 ^ in4;
+ tmp2 = out1 ^ in7;
+ out5 = tmp2 ^ in0 ^ in3;
+ out6 = tmp0 ^ tmp2;
+ out0 = out3 ^ out5 ^ in6;
+ out4 = tmp0 ^ out0;
+ out2 = out4 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_36(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in0 ^ in2;
+ tmp0 = in1 ^ in3;
+ out0 = in3 ^ in4 ^ in6;
+ out6 = in1 ^ in2 ^ in4;
+ out5 = tmp0 ^ in0;
+ tmp1 = out5 ^ in5;
+ out2 = tmp1 ^ in4;
+ out3 = tmp1 ^ out4;
+ out1 = tmp0 ^ out2 ^ in7;
+ out7 = out3 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_37(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in2;
+ tmp1 = in2 ^ in4;
+ tmp2 = tmp0 ^ in6;
+ out3 = tmp0 ^ in5;
+ out4 = tmp1 ^ in0;
+ out6 = tmp2 ^ in4;
+ out1 = out3 ^ out4 ^ in7;
+ tmp3 = out4 ^ in1 ^ in3;
+ out7 = tmp3 ^ out1;
+ out2 = tmp3 ^ in5;
+ out5 = tmp1 ^ out2;
+ out0 = tmp2 ^ tmp3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_38(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in0 ^ in3;
+ tmp0 = in3 ^ in4;
+ tmp1 = in5 ^ in7;
+ tmp2 = out3 ^ in1;
+ out2 = tmp0 ^ in6;
+ out0 = tmp0 ^ tmp1;
+ out4 = tmp1 ^ tmp2;
+ out7 = out2 ^ in2;
+ out1 = out2 ^ in3 ^ in5;
+ out6 = out4 ^ in0 ^ in2;
+ out5 = tmp2 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_39(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in0;
+ tmp0 = in1 ^ in5;
+ tmp1 = tmp0 ^ in4;
+ out1 = tmp1 ^ in6;
+ out5 = out1 ^ in0 ^ in2;
+ tmp2 = tmp0 ^ out5;
+ out2 = tmp2 ^ in0 ^ in3;
+ out7 = out2 ^ in7;
+ out6 = tmp1 ^ out7;
+ out4 = tmp2 ^ out6;
+ out0 = out4 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_3A(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in0 ^ in2;
+ tmp2 = in3 ^ in4;
+ tmp3 = in1 ^ in6;
+ tmp4 = in3 ^ in7;
+ out4 = tmp0 ^ in5;
+ out5 = tmp1 ^ tmp3;
+ out3 = tmp1 ^ tmp4;
+ out0 = tmp2 ^ in5;
+ out7 = tmp2 ^ in2;
+ tmp5 = tmp3 ^ in4;
+ out2 = tmp4 ^ tmp5;
+ out1 = tmp5 ^ out4;
+ out6 = tmp0 ^ out3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_3B(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in6;
+ tmp1 = in2 ^ in7;
+ tmp2 = tmp0 ^ in3;
+ out3 = tmp1 ^ in0;
+ out6 = tmp1 ^ tmp2;
+ out2 = out6 ^ in4;
+ out7 = tmp0 ^ out2;
+ out0 = out3 ^ out7 ^ in5;
+ out5 = out0 ^ out2 ^ in7;
+ out1 = tmp2 ^ out0;
+ out4 = out1 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_3C(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in3;
+ tmp1 = in2 ^ in7;
+ tmp2 = in1 ^ in6 ^ in7;
+ out2 = tmp0 ^ in4;
+ out3 = tmp0 ^ tmp2;
+ out4 = tmp1 ^ out3 ^ in5;
+ out5 = tmp2 ^ out2 ^ in2;
+ out1 = out4 ^ out5 ^ in6;
+ out0 = out1 ^ in3;
+ out7 = tmp1 ^ out0;
+ out6 = tmp2 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_3D(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in2;
+ tmp1 = tmp0 ^ in3;
+ out2 = tmp1 ^ in4;
+ tmp2 = out2 ^ in5;
+ out4 = tmp2 ^ in1 ^ in6;
+ out5 = out4 ^ in7;
+ out6 = out5 ^ in0;
+ out7 = out6 ^ in1;
+ out0 = tmp0 ^ out7;
+ out1 = tmp1 ^ out5;
+ out3 = tmp2 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_3E(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in5;
+ tmp1 = tmp0 ^ in4;
+ out0 = tmp1 ^ in6;
+ out7 = tmp1 ^ in2;
+ out6 = out7 ^ in1 ^ in5 ^ in7;
+ out2 = out6 ^ in0 ^ in2;
+ out4 = out0 ^ out6 ^ in0;
+ out5 = tmp0 ^ out4;
+ out3 = out5 ^ in7;
+ out1 = out3 ^ out6 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_3F(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in1;
+ out3 = tmp0 ^ in2 ^ in6;
+ tmp1 = out3 ^ in5 ^ in7;
+ out4 = tmp1 ^ in4;
+ out5 = tmp1 ^ in3;
+ out1 = out4 ^ in2;
+ out7 = out1 ^ out3 ^ in3;
+ out2 = tmp0 ^ out7 ^ in5;
+ tmp2 = out2 ^ in0;
+ out6 = tmp2 ^ in6;
+ out0 = tmp1 ^ tmp2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_40(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in3 ^ in7;
+ tmp0 = in3 ^ in4;
+ tmp1 = in6 ^ in7;
+ out4 = tmp0 ^ in2;
+ out5 = tmp0 ^ in5;
+ out0 = tmp1 ^ in2;
+ out7 = tmp1 ^ in1 ^ in5;
+ out2 = out0 ^ in4;
+ out3 = out2 ^ out5 ^ in7;
+ out6 = out3 ^ out4 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_41(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in2 ^ in3;
+ tmp0 = in5 ^ in6;
+ tmp1 = in6 ^ in7;
+ out5 = in3 ^ in4;
+ out1 = in1 ^ in3 ^ in7;
+ out6 = in0 ^ in4 ^ in5;
+ out3 = tmp0 ^ in2;
+ out7 = tmp0 ^ in1;
+ out2 = tmp1 ^ in4;
+ out0 = tmp1 ^ in0 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_42(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in2 ^ in6;
+ out5 = in3 ^ in5;
+ out1 = in0 ^ in3 ^ in7;
+ out7 = in1 ^ in5 ^ in7;
+ out4 = in2 ^ in4 ^ in7;
+ out6 = in0 ^ in4 ^ in6;
+ out2 = out0 ^ in1 ^ in4;
+ out3 = out5 ^ in6 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_43(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out5 = in3;
+ out7 = in1 ^ in5;
+ out4 = in2 ^ in7;
+ out6 = in0 ^ in4;
+ out0 = in0 ^ in2 ^ in6;
+ out3 = in5 ^ in6 ^ in7;
+ out2 = in1 ^ in4 ^ in6;
+ out1 = in0 ^ in1 ^ in3 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_44(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in3;
+ out0 = in2 ^ in7;
+ tmp0 = in4 ^ in7;
+ out7 = in1 ^ in6 ^ in7;
+ out6 = in0 ^ in5 ^ in6;
+ out4 = tmp0 ^ in3 ^ in6;
+ out3 = out0 ^ in1 ^ in3 ^ in5;
+ out2 = out0 ^ in0 ^ in4;
+ out5 = tmp0 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_45(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in1 ^ in3;
+ out7 = in1 ^ in6;
+ out5 = in4 ^ in7;
+ out6 = in0 ^ in5;
+ out0 = in0 ^ in2 ^ in7;
+ out4 = in3 ^ in6 ^ in7;
+ out2 = out5 ^ in0;
+ out3 = out0 ^ out6 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_46(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in2;
+ out1 = in0 ^ in3;
+ out7 = in1 ^ in7;
+ out4 = in4 ^ in6;
+ out5 = in5 ^ in7;
+ out6 = in0 ^ in6;
+ out3 = in1 ^ in3 ^ in5;
+ out2 = out4 ^ out6 ^ in1 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_47(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in6;
+ out7 = in1;
+ out5 = in7;
+ out6 = in0;
+ tmp0 = in0 ^ in1;
+ out3 = in1 ^ in5;
+ out0 = in0 ^ in2;
+ out1 = tmp0 ^ in3;
+ out2 = tmp0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_48(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in3;
+ out1 = in3 ^ in6 ^ in7;
+ out3 = tmp0 ^ in0;
+ out0 = tmp0 ^ out1 ^ in5;
+ tmp1 = out0 ^ in4;
+ out2 = tmp1 ^ in7;
+ out5 = tmp1 ^ in3;
+ out4 = out5 ^ in1;
+ out7 = tmp0 ^ out4;
+ out6 = tmp1 ^ out3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_49(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in0 ^ in2;
+ tmp0 = in2 ^ in5;
+ out2 = in4 ^ in5 ^ in6;
+ tmp1 = tmp0 ^ out2 ^ in3;
+ out7 = out2 ^ in1;
+ out5 = tmp1 ^ in7;
+ out4 = out5 ^ out7 ^ in6;
+ out1 = tmp0 ^ out4;
+ out6 = out1 ^ out7 ^ in0;
+ out0 = tmp1 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_4A(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in6;
+ tmp1 = in3 ^ in7;
+ out0 = tmp0 ^ in5;
+ out3 = tmp1 ^ in0;
+ out5 = tmp1 ^ out0;
+ out4 = out0 ^ in1 ^ in4;
+ out1 = out3 ^ in6;
+ out2 = out4 ^ in7;
+ out6 = out1 ^ in4;
+ out7 = tmp0 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_4B(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in0 ^ in7;
+ tmp0 = in1 ^ in5;
+ tmp1 = in2 ^ in6;
+ tmp2 = out3 ^ in3;
+ out7 = tmp0 ^ in4;
+ out4 = tmp0 ^ tmp1;
+ tmp3 = tmp1 ^ in0;
+ out6 = tmp2 ^ in4;
+ out5 = tmp2 ^ tmp3;
+ out1 = tmp2 ^ in1 ^ in6;
+ out2 = out7 ^ in6 ^ in7;
+ out0 = tmp3 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_4C(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in3 ^ in6;
+ tmp0 = in2 ^ in5;
+ tmp1 = out1 ^ in5 ^ in7;
+ out0 = tmp0 ^ in7;
+ tmp2 = tmp0 ^ in4;
+ out6 = tmp1 ^ in0;
+ out2 = tmp2 ^ in0;
+ out5 = tmp2 ^ in6;
+ out3 = tmp0 ^ out6 ^ in1;
+ out7 = out0 ^ out5 ^ in1;
+ out4 = tmp1 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_4D(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in5;
+ tmp1 = in1 ^ in6;
+ out4 = in1 ^ in3 ^ in5;
+ tmp2 = tmp0 ^ in7;
+ out2 = tmp0 ^ in4;
+ out1 = tmp1 ^ in3;
+ out7 = tmp1 ^ in4;
+ out0 = tmp2 ^ in2;
+ out6 = tmp2 ^ in3;
+ out5 = out7 ^ in1 ^ in2;
+ out3 = tmp1 ^ out0 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_4E(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in2 ^ in5;
+ out7 = in1 ^ in4 ^ in7;
+ out1 = in0 ^ in3 ^ in6;
+ out5 = out0 ^ in6;
+ out4 = out7 ^ in5;
+ out3 = out1 ^ in1;
+ out6 = out1 ^ in7;
+ out2 = out4 ^ in0 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_4F(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out5 = in2 ^ in6;
+ out7 = in1 ^ in4;
+ out3 = in0 ^ in1 ^ in6;
+ out4 = in1 ^ in5 ^ in7;
+ out0 = in0 ^ in2 ^ in5;
+ out6 = in0 ^ in3 ^ in7;
+ out1 = out3 ^ in3;
+ out2 = out4 ^ in0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_50(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in2 ^ in7;
+ tmp0 = in3 ^ in5;
+ out0 = out2 ^ in4 ^ in6;
+ out1 = tmp0 ^ in7;
+ tmp1 = tmp0 ^ in6;
+ out3 = out0 ^ in3;
+ out7 = tmp1 ^ in1;
+ tmp2 = tmp1 ^ in0;
+ out5 = out3 ^ in1 ^ in2;
+ out4 = tmp2 ^ in2;
+ out6 = tmp2 ^ out3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_51(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in7;
+ out3 = in2 ^ in4 ^ in6 ^ in7;
+ out0 = out3 ^ in0;
+ out6 = out0 ^ in5;
+ out4 = out6 ^ in3 ^ in7;
+ out1 = out0 ^ out4 ^ in1;
+ out7 = out1 ^ in6;
+ out5 = out7 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_52(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in1 ^ in2;
+ tmp0 = in2 ^ in4;
+ tmp1 = in3 ^ in5;
+ tmp2 = in3 ^ in6;
+ tmp3 = in0 ^ in7;
+ out0 = tmp0 ^ in6;
+ out6 = tmp0 ^ tmp3;
+ out7 = tmp1 ^ in1;
+ out1 = tmp1 ^ tmp3;
+ out3 = tmp2 ^ in4;
+ out5 = tmp2 ^ in1 ^ in7;
+ out4 = tmp2 ^ out1 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_53(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in1;
+ out3 = in4 ^ in6;
+ out0 = out3 ^ in0 ^ in2;
+ out6 = out0 ^ in7;
+ out4 = out6 ^ in5;
+ out7 = out0 ^ out4 ^ in1 ^ in3;
+ out1 = out7 ^ in0;
+ out5 = out7 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_54(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in3 ^ in5;
+ tmp0 = in1 ^ in3;
+ tmp1 = in2 ^ in4;
+ tmp2 = in0 ^ in7;
+ out5 = in1 ^ in4 ^ in6;
+ out4 = tmp2 ^ out1;
+ out7 = tmp0 ^ in6;
+ out3 = tmp0 ^ tmp1;
+ out0 = tmp1 ^ in7;
+ tmp3 = tmp2 ^ in2;
+ out2 = tmp3 ^ in6;
+ out6 = tmp3 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_55(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in3;
+ tmp1 = in1 ^ in4;
+ tmp2 = in6 ^ in7;
+ out7 = tmp0 ^ tmp2;
+ out1 = tmp0 ^ in5;
+ out3 = tmp1 ^ in2;
+ out5 = tmp1 ^ in5 ^ in6;
+ out2 = tmp2 ^ in0;
+ out4 = out5 ^ out7 ^ in0;
+ out6 = out2 ^ in2 ^ in5;
+ out0 = out5 ^ out6 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_56(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in2 ^ in4;
+ tmp0 = in0 ^ in2;
+ out4 = in0 ^ in5;
+ out7 = in1 ^ in3;
+ out5 = in1 ^ in6;
+ out6 = tmp0 ^ in7;
+ out2 = tmp0 ^ out5;
+ out1 = out4 ^ in3;
+ out3 = out7 ^ in4 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_57(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in5;
+ tmp1 = in1 ^ in7;
+ out0 = in0 ^ in2 ^ in4;
+ out5 = in1 ^ in5 ^ in6;
+ out4 = tmp0 ^ in4;
+ out1 = tmp0 ^ in1 ^ in3;
+ out2 = tmp0 ^ out5;
+ out3 = tmp1 ^ in4;
+ out7 = tmp1 ^ in3;
+ out6 = tmp1 ^ out2 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_58(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in2 ^ in5;
+ tmp0 = in2 ^ in3 ^ in4;
+ out5 = tmp0 ^ in1;
+ out6 = tmp0 ^ in0 ^ in5;
+ out3 = out6 ^ in7;
+ tmp1 = out2 ^ out5;
+ out7 = tmp1 ^ in6;
+ out4 = tmp1 ^ out3 ^ in3;
+ out0 = out4 ^ out7 ^ in0;
+ out1 = tmp0 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_59(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in5;
+ tmp0 = in0 ^ in5 ^ in7;
+ out3 = tmp0 ^ in2 ^ in4;
+ out0 = out3 ^ in6;
+ tmp1 = out0 ^ in7;
+ out6 = tmp1 ^ in3;
+ out5 = out6 ^ in0 ^ in1 ^ in6;
+ out4 = tmp0 ^ out5;
+ out1 = tmp1 ^ out4;
+ out7 = out1 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_5A(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in2;
+ tmp1 = in2 ^ in5;
+ out5 = tmp0 ^ in3;
+ out4 = tmp0 ^ in0;
+ tmp2 = tmp1 ^ in4;
+ out2 = tmp1 ^ in1 ^ in7;
+ out7 = tmp2 ^ out5;
+ out6 = out4 ^ out7 ^ in5;
+ out0 = tmp2 ^ in6;
+ out1 = out0 ^ out6 ^ in7;
+ out3 = tmp1 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_5B(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in3;
+ tmp1 = in0 ^ in4;
+ tmp2 = in1 ^ in5;
+ out5 = tmp0 ^ tmp2;
+ tmp3 = tmp1 ^ in6;
+ out3 = tmp1 ^ in5;
+ out2 = tmp2 ^ in7;
+ tmp4 = out3 ^ in2;
+ out7 = out2 ^ in3 ^ in4;
+ out0 = tmp4 ^ in6;
+ out6 = tmp0 ^ tmp3;
+ out4 = tmp2 ^ tmp4;
+ out1 = tmp3 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_5C(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in6;
+ tmp1 = in0 ^ in2 ^ in5;
+ out1 = tmp0 ^ in5;
+ tmp2 = tmp0 ^ in1;
+ out2 = tmp1 ^ in6;
+ out6 = tmp1 ^ in3;
+ out4 = tmp2 ^ in0;
+ out7 = tmp2 ^ in4;
+ out3 = tmp1 ^ out7;
+ out0 = out3 ^ out4 ^ in7;
+ out5 = out0 ^ in1 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_5D(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in0 ^ in6;
+ out2 = tmp1 ^ in5;
+ tmp2 = out2 ^ in3;
+ out6 = tmp2 ^ in2;
+ out1 = tmp0 ^ tmp2;
+ tmp3 = out1 ^ in4 ^ in5;
+ out4 = tmp3 ^ in0;
+ out7 = tmp3 ^ in7;
+ tmp4 = out4 ^ out6;
+ out5 = tmp4 ^ in7;
+ out0 = tmp0 ^ out5;
+ out3 = tmp1 ^ tmp4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_5E(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in5;
+ tmp1 = in3 ^ in5;
+ tmp2 = in1 ^ in7;
+ out7 = in1 ^ in3 ^ in4;
+ out0 = tmp0 ^ in4;
+ tmp3 = tmp1 ^ in0;
+ out5 = tmp2 ^ in2;
+ out1 = tmp3 ^ in6;
+ out6 = tmp0 ^ tmp3;
+ tmp4 = tmp2 ^ out1;
+ out3 = tmp4 ^ in4;
+ out4 = tmp1 ^ tmp4;
+ out2 = tmp0 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_5F(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in5;
+ tmp1 = in0 ^ in6;
+ tmp2 = tmp0 ^ in7;
+ tmp3 = tmp1 ^ in3;
+ out2 = tmp1 ^ tmp2;
+ out5 = tmp2 ^ in2;
+ out6 = tmp3 ^ in2;
+ out3 = out2 ^ in4;
+ out4 = out3 ^ in5;
+ out1 = tmp0 ^ tmp3;
+ out7 = tmp3 ^ out4;
+ out0 = out4 ^ out5 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_60(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in2 ^ in5;
+ tmp0 = in3 ^ in6;
+ out1 = in3 ^ in4 ^ in7;
+ out7 = out4 ^ in1;
+ tmp1 = out4 ^ in4;
+ out0 = tmp0 ^ in2;
+ out5 = tmp0 ^ in0;
+ out2 = tmp0 ^ tmp1;
+ out3 = tmp1 ^ in7;
+ out6 = out3 ^ out7 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_61(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in5;
+ out4 = tmp0 ^ in4;
+ tmp1 = out4 ^ in3;
+ out3 = tmp1 ^ in7;
+ out2 = tmp1 ^ in2 ^ in6;
+ out1 = tmp0 ^ out3 ^ in1;
+ out0 = out2 ^ out4 ^ in0;
+ out7 = tmp1 ^ out1;
+ out6 = out0 ^ out1 ^ in2;
+ out5 = tmp0 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_62(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in4 ^ in5;
+ tmp0 = in0 ^ in3 ^ in4;
+ out1 = tmp0 ^ in7;
+ out5 = tmp0 ^ in6;
+ tmp1 = out1 ^ in0;
+ tmp2 = tmp1 ^ out3;
+ out4 = tmp2 ^ in2;
+ tmp3 = tmp2 ^ in1;
+ out0 = out4 ^ in5 ^ in6;
+ out7 = tmp3 ^ out0;
+ out6 = tmp0 ^ tmp3;
+ out2 = tmp1 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_63(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in4;
+ tmp1 = in1 ^ in7;
+ out3 = tmp0 ^ in5;
+ tmp2 = out3 ^ in6;
+ out4 = out3 ^ in2 ^ in7;
+ out5 = tmp2 ^ in0;
+ tmp3 = out5 ^ in3;
+ out0 = tmp3 ^ out4;
+ out2 = tmp1 ^ tmp2;
+ out6 = tmp1 ^ tmp3;
+ tmp4 = tmp0 ^ out2;
+ out1 = tmp4 ^ out5;
+ out7 = tmp4 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_64(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in2 ^ in3;
+ out1 = in3 ^ in4;
+ out7 = in1 ^ in2;
+ tmp0 = in4 ^ in5;
+ tmp1 = in0 ^ in7;
+ out4 = in5 ^ in6 ^ in7;
+ out2 = tmp0 ^ out0 ^ in0;
+ out3 = tmp0 ^ out7 ^ in6;
+ out5 = tmp1 ^ in6;
+ out6 = tmp1 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_65(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in3;
+ tmp1 = in4 ^ in5;
+ tmp2 = in6 ^ in7;
+ out7 = in1 ^ in2 ^ in7;
+ out1 = in1 ^ in3 ^ in4;
+ out0 = tmp0 ^ in2;
+ out2 = tmp0 ^ tmp1;
+ out4 = tmp1 ^ tmp2;
+ tmp3 = tmp2 ^ in0;
+ out3 = out4 ^ out7 ^ in3;
+ out5 = tmp3 ^ in5;
+ out6 = tmp3 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_66(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in2;
+ tmp1 = in2 ^ in3;
+ tmp2 = in0 ^ in4;
+ out7 = tmp0 ^ in6;
+ out0 = tmp1 ^ in7;
+ out1 = tmp2 ^ in3;
+ tmp3 = tmp2 ^ in6;
+ tmp4 = out1 ^ in5;
+ out5 = tmp3 ^ in7;
+ out4 = tmp3 ^ tmp4;
+ out2 = tmp0 ^ tmp4 ^ in7;
+ out6 = tmp1 ^ out2 ^ in4;
+ out3 = tmp3 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_67(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in3;
+ tmp1 = tmp0 ^ in1;
+ tmp2 = tmp0 ^ in7;
+ out1 = tmp1 ^ in4;
+ out0 = tmp2 ^ in2;
+ tmp3 = out1 ^ in7;
+ out2 = tmp3 ^ in5;
+ out3 = out2 ^ in0 ^ in6;
+ out7 = tmp1 ^ out0 ^ in6;
+ out5 = tmp1 ^ out3;
+ out4 = tmp2 ^ out5;
+ out6 = tmp3 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_68(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in4;
+ tmp1 = in2 ^ in3 ^ in5;
+ tmp2 = tmp0 ^ in1;
+ tmp3 = tmp0 ^ in6;
+ out0 = tmp1 ^ in6;
+ out6 = tmp2 ^ in0;
+ out7 = tmp1 ^ tmp2;
+ out1 = tmp3 ^ in7;
+ out2 = out1 ^ in2;
+ out4 = tmp2 ^ out2;
+ out3 = out4 ^ out6 ^ in3;
+ out5 = tmp3 ^ out3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_69(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in6 ^ in7;
+ out2 = tmp0 ^ in3 ^ in4;
+ out1 = out2 ^ in1;
+ out3 = out2 ^ in0 ^ in2;
+ out4 = out1 ^ in2 ^ in3;
+ out6 = out1 ^ in0 ^ in7;
+ out7 = out4 ^ in5 ^ in6;
+ out5 = out4 ^ out6 ^ in5;
+ out0 = tmp0 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_6A(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in6;
+ out3 = in0 ^ in4 ^ in6;
+ tmp1 = tmp0 ^ in3;
+ out4 = tmp1 ^ in1;
+ tmp2 = tmp1 ^ in7;
+ out2 = out4 ^ in4;
+ out0 = tmp2 ^ in5;
+ out5 = tmp2 ^ out3;
+ out7 = out2 ^ in3 ^ in5;
+ out1 = tmp0 ^ out5;
+ out6 = tmp1 ^ out7 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_6B(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in4 ^ in6;
+ out2 = tmp0 ^ in1 ^ in3;
+ out4 = out2 ^ in2;
+ tmp1 = out2 ^ in0;
+ out7 = out4 ^ in3 ^ in5 ^ in7;
+ out1 = tmp1 ^ in7;
+ out3 = tmp1 ^ in1;
+ out6 = tmp1 ^ in5;
+ out0 = tmp1 ^ out7 ^ in6;
+ out5 = tmp0 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_6C(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in1;
+ tmp0 = in2 ^ in3;
+ out5 = in0 ^ in2;
+ out1 = in3 ^ in4 ^ in6;
+ tmp1 = out5 ^ in1;
+ out0 = tmp0 ^ in5;
+ out6 = tmp0 ^ tmp1;
+ out3 = tmp1 ^ in4;
+ out7 = out3 ^ in0;
+ out2 = out6 ^ out7 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_6D(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in1 ^ in4;
+ tmp0 = in0 ^ in2;
+ tmp1 = out4 ^ in3;
+ out7 = out4 ^ in2 ^ in7;
+ out5 = tmp0 ^ in5;
+ out3 = tmp0 ^ tmp1;
+ out1 = tmp1 ^ in6;
+ out0 = out5 ^ in3;
+ out2 = out3 ^ out7 ^ in4;
+ out6 = out1 ^ in0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_6E(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in3;
+ tmp1 = in0 ^ in4;
+ out4 = tmp0 ^ in7;
+ out6 = tmp0 ^ in0 ^ in5;
+ out5 = tmp1 ^ in2;
+ tmp2 = tmp1 ^ in3;
+ out3 = tmp2 ^ out4;
+ out1 = tmp2 ^ in6;
+ out2 = tmp0 ^ out5;
+ out0 = out2 ^ out3 ^ in5;
+ out7 = out1 ^ out2 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_6F(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in7;
+ tmp1 = tmp0 ^ in4;
+ tmp2 = tmp0 ^ in0 ^ in2;
+ out4 = tmp1 ^ in1;
+ out0 = tmp2 ^ in5;
+ out3 = out4 ^ in0;
+ out2 = out3 ^ in7;
+ out1 = out2 ^ in6;
+ out6 = out1 ^ in4 ^ in5;
+ out7 = tmp2 ^ out1;
+ out5 = tmp1 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_70(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in2;
+ tmp0 = in2 ^ in4;
+ out2 = in2 ^ in3 ^ in5;
+ tmp1 = tmp0 ^ in6;
+ tmp2 = out2 ^ in7;
+ out0 = tmp1 ^ in3;
+ out4 = tmp1 ^ in0;
+ out7 = tmp2 ^ in1;
+ out6 = out4 ^ in1;
+ out5 = out7 ^ in0 ^ in2;
+ out1 = tmp0 ^ tmp2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_71(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in3 ^ in5;
+ out3 = in2 ^ in3;
+ tmp0 = in0 ^ in2;
+ tmp1 = out2 ^ in1;
+ out4 = tmp0 ^ in6;
+ tmp2 = tmp0 ^ in1;
+ out7 = tmp1 ^ in2;
+ out1 = tmp1 ^ in4 ^ in7;
+ out0 = out4 ^ in3 ^ in4;
+ out6 = tmp2 ^ in4;
+ out5 = tmp2 ^ out3 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_72(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in7;
+ tmp0 = in0 ^ in4;
+ tmp1 = tmp0 ^ in3 ^ in7;
+ out1 = tmp1 ^ in5;
+ out5 = out1 ^ in1;
+ tmp2 = tmp0 ^ out5;
+ out2 = tmp2 ^ in2;
+ out7 = out2 ^ in6;
+ out6 = tmp1 ^ out7;
+ out4 = tmp2 ^ out6;
+ out0 = out4 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_73(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in3 ^ in7;
+ out2 = out3 ^ in1 ^ in5;
+ out1 = out2 ^ in0 ^ in4;
+ out5 = out1 ^ in5;
+ out6 = out1 ^ out3 ^ in2;
+ out0 = out2 ^ out6 ^ in6;
+ out7 = out0 ^ out1 ^ in3;
+ out4 = out0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_74(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in4;
+ tmp1 = in1 ^ in2 ^ in6;
+ out4 = in0 ^ in4 ^ in7;
+ out5 = in0 ^ in1 ^ in5;
+ out0 = tmp0 ^ in2;
+ out1 = tmp0 ^ in5;
+ out3 = tmp1 ^ in7;
+ out6 = tmp1 ^ in0;
+ out2 = tmp1 ^ out5 ^ in3;
+ out7 = out3 ^ in3 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_75(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in0 ^ in7;
+ tmp0 = in1 ^ in3;
+ out5 = in0 ^ in1;
+ out7 = tmp0 ^ in2;
+ tmp1 = tmp0 ^ in4;
+ out6 = out5 ^ in2;
+ tmp2 = out7 ^ in6;
+ out1 = tmp1 ^ in5;
+ out0 = tmp1 ^ out6;
+ out3 = tmp2 ^ in7;
+ out2 = tmp2 ^ out6 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_76(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in1 ^ in6;
+ tmp0 = in0 ^ in5;
+ tmp1 = in3 ^ in7;
+ tmp2 = tmp0 ^ in4;
+ tmp3 = tmp1 ^ in2;
+ out5 = tmp2 ^ in1;
+ out1 = tmp2 ^ in3;
+ out0 = tmp3 ^ in4;
+ out4 = out1 ^ in5;
+ out7 = tmp3 ^ out3;
+ out2 = tmp0 ^ out7;
+ out6 = tmp1 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_77(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in0 ^ in3;
+ tmp0 = in1 ^ in4;
+ tmp1 = in1 ^ in6;
+ tmp2 = out4 ^ in5;
+ out5 = tmp0 ^ in0;
+ out1 = tmp0 ^ tmp2;
+ out3 = tmp1 ^ in3;
+ out2 = tmp1 ^ tmp2 ^ in7;
+ out7 = out3 ^ in2;
+ tmp3 = out7 ^ in6;
+ out6 = tmp2 ^ tmp3;
+ out0 = tmp3 ^ out5 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_78(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in3;
+ tmp1 = in2 ^ in7;
+ tmp2 = in0 ^ in5 ^ in6;
+ out2 = tmp1 ^ in3;
+ out3 = tmp2 ^ in2;
+ out5 = out3 ^ in1 ^ in3;
+ out0 = tmp0 ^ out3 ^ in4;
+ out1 = tmp1 ^ out0;
+ out4 = out1 ^ out5 ^ in5;
+ out7 = tmp0 ^ out4;
+ out6 = tmp2 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_79(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in3 ^ in7;
+ tmp0 = in3 ^ in4;
+ tmp1 = in1 ^ in5;
+ tmp2 = tmp1 ^ in2;
+ out4 = tmp2 ^ in0 ^ in7;
+ tmp3 = out4 ^ in5;
+ out5 = tmp3 ^ out2 ^ in6;
+ out7 = tmp0 ^ tmp2;
+ out6 = tmp0 ^ tmp3;
+ out3 = tmp1 ^ out5;
+ out0 = out3 ^ in4;
+ out1 = tmp3 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_7A(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in2;
+ out2 = tmp0 ^ in3;
+ tmp1 = out2 ^ in4;
+ out4 = tmp1 ^ in0 ^ in5;
+ out5 = out4 ^ in6;
+ out6 = out5 ^ in7;
+ out7 = out6 ^ in0;
+ out0 = out7 ^ in1;
+ out1 = tmp0 ^ out6;
+ out3 = tmp1 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_7B(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in1 ^ in3;
+ tmp0 = in0 ^ in5;
+ out4 = tmp0 ^ out2 ^ in2;
+ tmp1 = out4 ^ in4;
+ out6 = tmp1 ^ in7;
+ out5 = tmp1 ^ in5 ^ in6;
+ out0 = out6 ^ in1 ^ in6;
+ tmp2 = out0 ^ in2;
+ out1 = tmp2 ^ in1;
+ out3 = tmp2 ^ in4;
+ out7 = tmp0 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_7C(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in5;
+ tmp1 = tmp0 ^ in4;
+ out0 = tmp1 ^ in2;
+ out1 = tmp1 ^ in6;
+ out7 = out0 ^ in1 ^ in5 ^ in7;
+ out5 = out1 ^ out7 ^ in0;
+ out3 = out5 ^ in6;
+ out6 = tmp0 ^ out5;
+ out2 = out6 ^ in1;
+ out4 = out2 ^ out7 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_7D(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in2;
+ tmp1 = tmp0 ^ in3;
+ tmp2 = tmp0 ^ in6;
+ out7 = tmp1 ^ in4;
+ tmp3 = tmp2 ^ in0;
+ out5 = tmp3 ^ in7;
+ out4 = tmp3 ^ in2 ^ in5;
+ out2 = tmp1 ^ out5;
+ out6 = tmp2 ^ out2;
+ out0 = out4 ^ out7 ^ in6;
+ out1 = tmp3 ^ out0;
+ out3 = out6 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_7E(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in4;
+ tmp1 = in0 ^ in5;
+ out1 = tmp0 ^ tmp1 ^ in6;
+ out3 = tmp1 ^ in1;
+ out4 = out1 ^ in1 ^ in7;
+ tmp2 = out4 ^ in3;
+ out5 = tmp2 ^ in2;
+ out6 = tmp0 ^ out5;
+ out7 = tmp1 ^ out4 ^ in2;
+ out2 = out6 ^ in5 ^ in7;
+ out0 = tmp2 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_7F(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in7;
+ tmp1 = tmp0 ^ in3 ^ in5;
+ tmp2 = tmp1 ^ in0;
+ out0 = tmp2 ^ in4;
+ out6 = tmp2 ^ in1;
+ out3 = tmp0 ^ out6;
+ tmp3 = out3 ^ in6;
+ out1 = tmp3 ^ in4;
+ out2 = tmp3 ^ in5;
+ out4 = tmp3 ^ in7;
+ out5 = tmp1 ^ out1;
+ out7 = out0 ^ out4 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_80(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in3;
+ tmp1 = in4 ^ in5;
+ out1 = in2 ^ in6 ^ in7;
+ out5 = tmp0 ^ in4;
+ tmp2 = tmp0 ^ in1;
+ out6 = tmp1 ^ in3;
+ out7 = tmp1 ^ in0 ^ in6;
+ out4 = tmp2 ^ in7;
+ out3 = tmp2 ^ out6;
+ out2 = out3 ^ out5 ^ in6;
+ out0 = out2 ^ in3 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_81(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in4 ^ in6;
+ tmp1 = tmp0 ^ in3;
+ out6 = tmp1 ^ in5;
+ out5 = out6 ^ in2 ^ in6;
+ out3 = out5 ^ in1;
+ out2 = tmp0 ^ out3;
+ out1 = out3 ^ out6 ^ in7;
+ out4 = tmp1 ^ out1;
+ out7 = out2 ^ out4 ^ in0;
+ out0 = out7 ^ in1 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_82(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in1 ^ in2;
+ tmp0 = in6 ^ in7;
+ out5 = in2 ^ in3;
+ out6 = in3 ^ in4;
+ out7 = in0 ^ in4 ^ in5;
+ out0 = in1 ^ in5 ^ in6;
+ out1 = tmp0 ^ in0 ^ in2;
+ out2 = tmp0 ^ in3 ^ in5;
+ out3 = tmp0 ^ out0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_83(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in2 ^ in5;
+ tmp2 = in3 ^ in6;
+ out4 = in1 ^ in2 ^ in4;
+ out0 = tmp0 ^ in5 ^ in6;
+ out5 = tmp1 ^ in3;
+ tmp3 = tmp1 ^ in7;
+ out6 = tmp2 ^ in4;
+ out2 = tmp2 ^ tmp3;
+ tmp4 = tmp3 ^ out4;
+ out1 = tmp3 ^ out0;
+ out3 = tmp4 ^ in3;
+ out7 = tmp0 ^ tmp4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_84(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in2 ^ in6;
+ out6 = in3 ^ in5;
+ out0 = in1 ^ in5 ^ in7;
+ out7 = in0 ^ in4 ^ in6;
+ out4 = in1 ^ in3 ^ in6;
+ out5 = in2 ^ in4 ^ in7;
+ out2 = out6 ^ in0 ^ in1;
+ out3 = out5 ^ in5 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_85(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in6;
+ tmp1 = in3 ^ in6;
+ tmp2 = tmp0 ^ in4;
+ out1 = tmp0 ^ in2;
+ out6 = tmp1 ^ in5;
+ out4 = tmp2 ^ in3;
+ tmp3 = out1 ^ out6;
+ out2 = tmp3 ^ in0;
+ out3 = tmp2 ^ tmp3 ^ in7;
+ out7 = out2 ^ out3 ^ in1;
+ out5 = tmp1 ^ out3;
+ out0 = tmp2 ^ out7 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_86(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out6 = in3;
+ out7 = in0 ^ in4;
+ out0 = in1 ^ in5;
+ out5 = in2 ^ in7;
+ out3 = in4 ^ in5 ^ in6;
+ out1 = in0 ^ in2 ^ in6;
+ out4 = in1 ^ in6 ^ in7;
+ out2 = in0 ^ in3 ^ in5 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_87(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out6 = in3 ^ in6;
+ tmp0 = in0 ^ in1;
+ out7 = in0 ^ in4 ^ in7;
+ out5 = in2 ^ in5 ^ in7;
+ out3 = out6 ^ in4 ^ in5;
+ out0 = tmp0 ^ in5;
+ tmp1 = tmp0 ^ in6;
+ out2 = out5 ^ in0 ^ in3;
+ out1 = tmp1 ^ in2;
+ out4 = tmp1 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_88(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in2 ^ in7;
+ tmp0 = in5 ^ in6;
+ out0 = in1 ^ in6 ^ in7;
+ out6 = in4 ^ in5 ^ in7;
+ out3 = out0 ^ out1 ^ in0 ^ in4;
+ out7 = tmp0 ^ in0;
+ tmp1 = tmp0 ^ in3;
+ out2 = out0 ^ in3;
+ out4 = tmp1 ^ in2;
+ out5 = tmp1 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_89(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in7;
+ tmp1 = in2 ^ in7;
+ tmp2 = tmp0 ^ in6;
+ out1 = tmp1 ^ in1;
+ out7 = tmp2 ^ in5;
+ out0 = tmp2 ^ in1;
+ out2 = out1 ^ in3 ^ in6;
+ out6 = out7 ^ in0 ^ in4;
+ out5 = out6 ^ in3;
+ out3 = tmp0 ^ out2 ^ in4;
+ out4 = tmp1 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_8A(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in1 ^ in6;
+ out7 = in0 ^ in5;
+ out2 = in3 ^ in6;
+ out6 = in4 ^ in7;
+ out1 = in0 ^ in2 ^ in7;
+ out3 = out0 ^ out6 ^ in0;
+ out4 = out1 ^ out7 ^ in6;
+ out5 = out2 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_8B(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in3 ^ in6;
+ tmp2 = in5 ^ in7;
+ tmp3 = tmp0 ^ in7;
+ out0 = tmp0 ^ in6;
+ out2 = tmp1 ^ in2;
+ out5 = tmp1 ^ tmp2;
+ out7 = tmp2 ^ in0;
+ tmp4 = tmp3 ^ in4;
+ out1 = tmp3 ^ in2;
+ out6 = tmp4 ^ out0;
+ out4 = out6 ^ in2 ^ in5;
+ out3 = tmp1 ^ tmp4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_8C(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in2;
+ out0 = in1 ^ in7;
+ out7 = in0 ^ in6;
+ out5 = in4 ^ in6;
+ out6 = in5 ^ in7;
+ out2 = out0 ^ in0 ^ in3;
+ out3 = out5 ^ out7 ^ in2 ^ in7;
+ out4 = out6 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_8D(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in1 ^ in2;
+ tmp0 = in6 ^ in7;
+ out0 = in0 ^ in1 ^ in7;
+ out5 = in4 ^ in5 ^ in6;
+ out6 = tmp0 ^ in5;
+ out7 = tmp0 ^ in0;
+ out4 = tmp0 ^ out5 ^ in3;
+ out2 = out0 ^ in2 ^ in3;
+ out3 = out2 ^ in1 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_8E(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in1;
+ out4 = in5;
+ out7 = in0;
+ out5 = in6;
+ out6 = in7;
+ out3 = in0 ^ in4;
+ out1 = in0 ^ in2;
+ out2 = in0 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_8F(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in0 ^ in1;
+ tmp0 = in0 ^ in3;
+ out4 = in4 ^ in5;
+ out7 = in0 ^ in7;
+ out5 = in5 ^ in6;
+ out6 = in6 ^ in7;
+ out1 = out0 ^ in2;
+ out2 = tmp0 ^ in2;
+ out3 = tmp0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_90(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in2;
+ tmp1 = in2 ^ in6 ^ in7;
+ out3 = tmp0 ^ in7;
+ out1 = tmp1 ^ in5;
+ tmp2 = out1 ^ in4;
+ out6 = tmp2 ^ in3;
+ out5 = out6 ^ in1;
+ out4 = out5 ^ in0;
+ out0 = tmp0 ^ tmp2;
+ out7 = tmp0 ^ out4;
+ out2 = tmp1 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_91(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in4;
+ tmp1 = tmp0 ^ in3 ^ in5;
+ out2 = tmp1 ^ in1;
+ out6 = tmp1 ^ in7;
+ tmp2 = out2 ^ in5 ^ in7;
+ out3 = tmp2 ^ in4;
+ out5 = tmp2 ^ in6;
+ out1 = tmp1 ^ out5 ^ in2;
+ tmp3 = out1 ^ in0;
+ out4 = tmp3 ^ in3;
+ out0 = tmp0 ^ tmp3;
+ out7 = tmp2 ^ tmp3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_92(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in1;
+ tmp0 = in4 ^ in5;
+ tmp1 = tmp0 ^ in1;
+ out2 = tmp0 ^ in3 ^ in7;
+ out0 = tmp1 ^ in6;
+ out7 = out2 ^ in0;
+ out4 = out0 ^ in0 ^ in2;
+ out5 = out4 ^ out7 ^ in5;
+ out6 = tmp1 ^ out5;
+ out1 = out6 ^ out7 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_93(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in1 ^ in3;
+ tmp0 = in2 ^ in7;
+ tmp1 = out3 ^ in6;
+ tmp2 = tmp0 ^ in4;
+ out5 = tmp0 ^ tmp1;
+ out6 = tmp2 ^ in3;
+ out2 = out6 ^ in5;
+ out0 = out2 ^ out5 ^ in0;
+ out7 = tmp1 ^ out0;
+ out1 = tmp2 ^ out0;
+ out4 = out1 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_94(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in2 ^ in6;
+ tmp0 = in1 ^ in4 ^ in5;
+ out1 = out3 ^ in5;
+ out5 = tmp0 ^ out3;
+ out0 = tmp0 ^ in7;
+ out4 = tmp0 ^ in0 ^ in3;
+ out6 = out1 ^ in3 ^ in7;
+ out2 = out4 ^ in6;
+ out7 = out0 ^ out2 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_95(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in3;
+ out3 = tmp0 ^ in6;
+ tmp1 = tmp0 ^ in7;
+ tmp2 = out3 ^ in0;
+ out6 = tmp1 ^ in5;
+ tmp3 = tmp2 ^ in4;
+ out7 = tmp3 ^ in2;
+ tmp4 = tmp3 ^ in5;
+ out2 = tmp4 ^ in1;
+ tmp5 = out2 ^ in6;
+ out0 = tmp1 ^ tmp5;
+ out1 = tmp5 ^ out7;
+ out4 = tmp2 ^ out1;
+ out5 = tmp4 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_96(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in6 ^ in7;
+ tmp0 = in1 ^ in5;
+ tmp1 = in5 ^ in6;
+ out6 = out3 ^ in2 ^ in3;
+ out0 = tmp0 ^ in4;
+ tmp2 = tmp1 ^ in2;
+ out4 = out0 ^ in0 ^ in7;
+ out1 = tmp2 ^ in0;
+ out5 = tmp2 ^ in1;
+ out7 = tmp0 ^ out4 ^ in3;
+ out2 = tmp1 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_97(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in4;
+ tmp1 = in2 ^ in6;
+ out3 = in3 ^ in6 ^ in7;
+ out7 = tmp0 ^ in3;
+ tmp2 = tmp0 ^ in5;
+ out5 = tmp1 ^ in1;
+ out6 = tmp1 ^ out3;
+ out0 = tmp2 ^ in1;
+ out2 = tmp2 ^ out3 ^ in2;
+ tmp3 = out0 ^ in4;
+ out4 = tmp3 ^ in7;
+ out1 = tmp1 ^ tmp3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_98(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in5 ^ in7;
+ tmp1 = in1 ^ in4 ^ in7;
+ out1 = tmp0 ^ in2;
+ out0 = tmp1 ^ in6;
+ out2 = tmp1 ^ in3;
+ out6 = out0 ^ out1 ^ in1;
+ out5 = tmp0 ^ out2;
+ out3 = tmp1 ^ out6 ^ in0;
+ out7 = out0 ^ out5 ^ in0;
+ out4 = out6 ^ out7 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_99(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in3;
+ out5 = in1 ^ in3 ^ in4;
+ out6 = in2 ^ in4 ^ in5;
+ out4 = tmp0 ^ in2;
+ tmp1 = tmp0 ^ in6;
+ tmp2 = out5 ^ in7;
+ out7 = tmp1 ^ in5;
+ out0 = tmp1 ^ tmp2;
+ out2 = tmp2 ^ in2;
+ out3 = out0 ^ out6 ^ in3;
+ out1 = tmp1 ^ out3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_9A(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in3 ^ in4;
+ tmp0 = in0 ^ in5;
+ tmp1 = in1 ^ in6;
+ out5 = in1 ^ in3 ^ in5;
+ tmp2 = tmp0 ^ in7;
+ out3 = tmp0 ^ tmp1;
+ out0 = tmp1 ^ in4;
+ out7 = tmp2 ^ in3;
+ out1 = tmp2 ^ in2;
+ out6 = out0 ^ in1 ^ in2;
+ out4 = out1 ^ in4 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_9B(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out5 = in1 ^ in3;
+ tmp0 = in3 ^ in5;
+ out6 = in2 ^ in4;
+ out4 = in0 ^ in2 ^ in7;
+ out7 = tmp0 ^ in0;
+ out2 = out6 ^ in3;
+ out1 = out4 ^ in1 ^ in5;
+ out3 = out7 ^ in1 ^ in6;
+ out0 = tmp0 ^ out3 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_9C(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in2 ^ in5;
+ tmp0 = in0 ^ in3 ^ in6;
+ out3 = out1 ^ in0;
+ out6 = out1 ^ in6;
+ out7 = tmp0 ^ in7;
+ out4 = out7 ^ in4;
+ out2 = out4 ^ in1;
+ out0 = tmp0 ^ out2;
+ out5 = out0 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_9D(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out6 = in2 ^ in5;
+ tmp0 = in0 ^ in3;
+ out5 = in1 ^ in4 ^ in7;
+ out1 = out6 ^ in1;
+ out3 = tmp0 ^ out6;
+ out7 = tmp0 ^ in6;
+ out0 = out5 ^ in0;
+ out4 = out7 ^ in7;
+ out2 = out5 ^ out7 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_9E(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in1 ^ in4;
+ tmp0 = in0 ^ in5;
+ out6 = in2 ^ in6;
+ out7 = in0 ^ in3 ^ in7;
+ out4 = in0 ^ in4 ^ in6;
+ out5 = in1 ^ in5 ^ in7;
+ out1 = tmp0 ^ in2;
+ out3 = tmp0 ^ in7;
+ out2 = out4 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_9F(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out6 = in2;
+ out7 = in0 ^ in3;
+ tmp0 = in0 ^ in1;
+ out4 = in0 ^ in6;
+ out5 = in1 ^ in7;
+ out1 = tmp0 ^ in2 ^ in5;
+ out2 = out7 ^ in2 ^ in4 ^ in6;
+ out3 = out7 ^ in5 ^ in7;
+ out0 = tmp0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_A0(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in6;
+ out2 = tmp0 ^ in7;
+ tmp1 = tmp0 ^ in5;
+ out6 = out2 ^ in3 ^ in4;
+ out0 = tmp1 ^ in3;
+ tmp2 = out0 ^ in2;
+ out3 = tmp2 ^ in7;
+ tmp3 = tmp2 ^ in1;
+ out5 = tmp3 ^ in0;
+ out4 = tmp3 ^ out6;
+ out7 = out5 ^ out6 ^ in1;
+ out1 = tmp1 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_A1(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in5;
+ tmp1 = tmp0 ^ in1;
+ tmp2 = tmp0 ^ in4;
+ out4 = tmp1 ^ in7;
+ out7 = tmp2 ^ in0;
+ out6 = tmp2 ^ out4 ^ in3;
+ out3 = out4 ^ in6;
+ out2 = out3 ^ in5;
+ out1 = out2 ^ in4;
+ out5 = out1 ^ out6 ^ in0;
+ out0 = tmp1 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_A2(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in6;
+ tmp0 = in1 ^ in3 ^ in5;
+ out3 = tmp0 ^ in6;
+ out4 = tmp0 ^ in2 ^ in4;
+ out0 = out3 ^ in7;
+ out6 = out0 ^ in4;
+ out1 = out0 ^ out4 ^ in0;
+ out7 = out1 ^ in5;
+ out5 = out7 ^ in3 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_A3(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in2 ^ in6;
+ out3 = in1 ^ in5 ^ in6;
+ tmp0 = out2 ^ in0;
+ out4 = out2 ^ out3 ^ in3;
+ tmp1 = tmp0 ^ in4;
+ out0 = tmp0 ^ out4 ^ in7;
+ out5 = tmp1 ^ in3;
+ out7 = tmp1 ^ in5;
+ out1 = tmp1 ^ in1 ^ in7;
+ out6 = tmp1 ^ out0 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_A4(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in3;
+ tmp1 = in2 ^ in4;
+ tmp2 = in2 ^ in5;
+ tmp3 = in0 ^ in7;
+ out0 = tmp0 ^ in5;
+ out6 = tmp0 ^ in6 ^ in7;
+ out1 = tmp1 ^ in6;
+ out7 = tmp1 ^ tmp3;
+ out3 = tmp2 ^ in3;
+ tmp4 = tmp2 ^ out1;
+ out2 = tmp3 ^ in1;
+ out5 = tmp4 ^ out7;
+ out4 = tmp4 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_A5(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in2 ^ in5;
+ tmp0 = in1 ^ in6;
+ tmp1 = in0 ^ in1;
+ tmp2 = in2 ^ in4;
+ out6 = in1 ^ in3 ^ in7;
+ out4 = tmp0 ^ in5;
+ out1 = tmp0 ^ tmp2;
+ out0 = tmp1 ^ in3 ^ in5;
+ out2 = tmp1 ^ in2 ^ in7;
+ out7 = tmp2 ^ in0;
+ out5 = tmp0 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_A6(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in0;
+ out3 = in3 ^ in5 ^ in7;
+ out1 = in0 ^ in2 ^ in4 ^ in6;
+ out0 = out3 ^ in1;
+ out7 = out1 ^ in7;
+ out6 = out0 ^ in6;
+ out5 = out7 ^ in5;
+ out4 = out6 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_A7(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in0 ^ in2;
+ out3 = in5 ^ in7;
+ out7 = out2 ^ in4 ^ in6;
+ out6 = out3 ^ in1 ^ in3;
+ out1 = out7 ^ in1;
+ out5 = out7 ^ in7;
+ out0 = out6 ^ in0;
+ out4 = out6 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_A8(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in4;
+ tmp1 = in1 ^ in6;
+ tmp2 = in0 ^ in2 ^ in7;
+ out1 = tmp0 ^ in7;
+ out4 = tmp0 ^ in6;
+ out0 = tmp1 ^ in3;
+ out2 = tmp1 ^ in5;
+ out6 = tmp1 ^ in4;
+ out7 = tmp2 ^ in5;
+ out3 = tmp2 ^ out0 ^ in6;
+ out5 = out7 ^ in2 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_A9(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in2 ^ in6;
+ out6 = in1 ^ in4;
+ out7 = in0 ^ in2 ^ in5;
+ out5 = in0 ^ in3 ^ in7;
+ out2 = out4 ^ in1 ^ in5;
+ out1 = out6 ^ in2 ^ in7;
+ out0 = out2 ^ out7 ^ in3;
+ out3 = out1 ^ in0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_AA(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in2;
+ tmp1 = in1 ^ in3;
+ tmp2 = in6 ^ in7;
+ out1 = tmp0 ^ in4 ^ in7;
+ out3 = tmp1 ^ in0;
+ out0 = tmp1 ^ tmp2;
+ out2 = tmp2 ^ in5;
+ out7 = tmp0 ^ out2;
+ out6 = out1 ^ out7 ^ in1;
+ out5 = out0 ^ out6 ^ in0;
+ out4 = out5 ^ out7 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_AB(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in0 ^ in1;
+ tmp0 = in1 ^ in4;
+ tmp1 = in0 ^ in7;
+ out6 = tmp0 ^ in5;
+ out1 = tmp0 ^ tmp1 ^ in2;
+ out5 = tmp1 ^ in3 ^ in4;
+ out0 = tmp0 ^ out5 ^ in6;
+ out4 = out0 ^ out3 ^ in2;
+ out2 = out4 ^ in3 ^ in5;
+ out7 = tmp1 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_AC(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in1 ^ in3;
+ out1 = in2 ^ in4;
+ tmp0 = in0 ^ in2;
+ out4 = in4 ^ in7;
+ out5 = in0 ^ in5;
+ out6 = in1 ^ in6;
+ out7 = tmp0 ^ in7;
+ out3 = tmp0 ^ in3 ^ in6;
+ out2 = out5 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_AD(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in7;
+ out5 = in0;
+ out6 = in1;
+ out7 = in0 ^ in2;
+ out0 = in0 ^ in1 ^ in3;
+ out2 = out7 ^ in1 ^ in5;
+ out1 = in1 ^ in2 ^ in4;
+ out3 = out7 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_AE(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in3 ^ in4;
+ tmp0 = in0 ^ in4;
+ tmp1 = in0 ^ in7;
+ out0 = in1 ^ in3 ^ in7;
+ out1 = tmp0 ^ in2;
+ out5 = tmp0 ^ in5;
+ tmp2 = tmp1 ^ in6;
+ out2 = tmp1 ^ in5;
+ out3 = tmp2 ^ in3;
+ out7 = tmp2 ^ in2;
+ out6 = tmp2 ^ out2 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_AF(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in3;
+ tmp0 = in0 ^ in7;
+ out5 = in0 ^ in4;
+ out6 = in1 ^ in5;
+ out7 = in0 ^ in2 ^ in6;
+ out0 = tmp0 ^ in1 ^ in3;
+ out3 = tmp0 ^ in6;
+ out2 = tmp0 ^ in2 ^ in5;
+ out1 = out5 ^ in1 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_B0(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in4;
+ tmp1 = in3 ^ in6;
+ out2 = tmp0 ^ in7;
+ tmp2 = tmp0 ^ tmp1;
+ out0 = tmp2 ^ in5;
+ out3 = tmp2 ^ in2;
+ out6 = out3 ^ in6;
+ tmp3 = out6 ^ in0 ^ in1;
+ out7 = tmp3 ^ in5;
+ out5 = tmp3 ^ out2;
+ out1 = out0 ^ out5 ^ in0;
+ out4 = tmp1 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_B1(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in4;
+ out2 = tmp0 ^ in2 ^ in7;
+ tmp1 = out2 ^ in6;
+ out1 = tmp1 ^ in5;
+ out3 = tmp1 ^ in7;
+ out4 = tmp1 ^ in0;
+ out6 = out3 ^ in3;
+ out0 = out6 ^ in0 ^ in2 ^ in5;
+ out5 = tmp1 ^ out0 ^ in1;
+ out7 = tmp0 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_B2(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in4;
+ tmp0 = in4 ^ in7;
+ tmp1 = in1 ^ in3 ^ in6;
+ out3 = tmp0 ^ tmp1;
+ tmp2 = tmp1 ^ in0;
+ out0 = out3 ^ in5;
+ out4 = tmp2 ^ in2;
+ tmp3 = out4 ^ in6;
+ out5 = tmp0 ^ tmp3;
+ out1 = tmp3 ^ out0;
+ tmp4 = out1 ^ in7;
+ out7 = tmp4 ^ in3;
+ out6 = tmp2 ^ tmp4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_B3(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in2 ^ in4;
+ tmp0 = in0 ^ in5;
+ tmp1 = in1 ^ in6;
+ out3 = tmp1 ^ in4 ^ in7;
+ tmp2 = tmp0 ^ out3;
+ out0 = tmp2 ^ in3;
+ out1 = tmp2 ^ in2;
+ out5 = out0 ^ in2 ^ in6;
+ out7 = tmp1 ^ out5;
+ out4 = out7 ^ in1 ^ in5 ^ in7;
+ out6 = tmp0 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_B4(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in0 ^ in1;
+ out5 = out4 ^ in2;
+ tmp0 = out4 ^ in4;
+ out6 = out5 ^ in0 ^ in3;
+ out7 = tmp0 ^ out6;
+ out2 = tmp0 ^ in6 ^ in7;
+ out3 = out7 ^ in0 ^ in7;
+ out0 = out5 ^ out7 ^ in5;
+ out1 = out0 ^ out6 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_B5(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in2 ^ in4;
+ out4 = tmp0 ^ in4;
+ out3 = tmp1 ^ in7;
+ tmp2 = out4 ^ in5;
+ out7 = out3 ^ in0 ^ in3;
+ out0 = tmp2 ^ in3;
+ out2 = tmp0 ^ out3 ^ in6;
+ out5 = tmp1 ^ tmp2;
+ out6 = out2 ^ out7 ^ in2;
+ out1 = tmp0 ^ out0 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_B6(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in3 ^ in4;
+ tmp0 = in1 ^ in2;
+ tmp1 = in0 ^ in4;
+ tmp2 = in3 ^ in5;
+ tmp3 = out3 ^ in1 ^ in7;
+ out5 = tmp0 ^ tmp1;
+ out6 = tmp0 ^ tmp2;
+ out2 = tmp1 ^ in6;
+ out4 = tmp1 ^ tmp3;
+ out0 = tmp3 ^ in5;
+ out1 = out2 ^ in2 ^ in5;
+ out7 = tmp2 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_B7(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in4;
+ tmp0 = in0 ^ in4;
+ out2 = tmp0 ^ in2 ^ in6;
+ tmp1 = out2 ^ in7;
+ out1 = out2 ^ in1 ^ in5;
+ out7 = tmp1 ^ in3;
+ out5 = out1 ^ in6;
+ out6 = tmp0 ^ out1 ^ in3;
+ out0 = tmp1 ^ out6;
+ out4 = out0 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_B8(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in4;
+ tmp1 = in2 ^ in5;
+ out2 = tmp0 ^ in5;
+ out4 = tmp1 ^ in0;
+ tmp2 = tmp1 ^ in7;
+ out6 = tmp2 ^ out2;
+ out7 = out4 ^ in3;
+ out1 = tmp2 ^ in4;
+ out3 = tmp0 ^ out7;
+ out0 = out3 ^ out4 ^ in6;
+ out5 = out0 ^ in0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_B9(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in2;
+ tmp1 = in4 ^ in5;
+ out4 = tmp0 ^ tmp1;
+ tmp2 = tmp0 ^ in3 ^ in7;
+ out3 = out4 ^ in1;
+ out7 = tmp2 ^ in5;
+ out2 = out3 ^ in0;
+ out1 = out2 ^ in7;
+ out6 = out1 ^ in5 ^ in6;
+ out0 = tmp2 ^ out6;
+ out5 = tmp1 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_BA(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in5 ^ in7;
+ out2 = tmp0 ^ in4;
+ tmp1 = out2 ^ in2;
+ out1 = tmp1 ^ in0;
+ out6 = tmp1 ^ in1;
+ out4 = out1 ^ in3 ^ in4;
+ tmp2 = out4 ^ out6;
+ out7 = out4 ^ in6 ^ in7;
+ out5 = tmp2 ^ in6;
+ out3 = tmp0 ^ tmp2;
+ out0 = out6 ^ out7 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_BB(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in2 ^ in4 ^ in5 ^ in7;
+ tmp0 = out2 ^ in1;
+ out4 = out2 ^ in0 ^ in3;
+ out1 = tmp0 ^ in0;
+ out6 = tmp0 ^ in6;
+ out3 = out1 ^ in2;
+ tmp1 = out4 ^ out6 ^ in4;
+ out0 = tmp1 ^ in7;
+ out5 = tmp1 ^ in5;
+ out7 = tmp0 ^ tmp1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_BC(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in2;
+ tmp1 = in2 ^ in4;
+ out0 = in1 ^ in3 ^ in4;
+ out6 = in1 ^ in2 ^ in7;
+ out7 = tmp0 ^ in3;
+ out5 = tmp0 ^ out6 ^ in6;
+ out1 = tmp1 ^ in5;
+ tmp2 = out1 ^ out5 ^ in1;
+ out3 = tmp2 ^ in3;
+ out4 = tmp1 ^ tmp2;
+ out2 = tmp2 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_BD(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in3;
+ tmp1 = in1 ^ in4;
+ out0 = tmp0 ^ tmp1;
+ out7 = tmp0 ^ in2 ^ in7;
+ out1 = tmp1 ^ in2 ^ in5;
+ tmp2 = out1 ^ in0;
+ out2 = tmp2 ^ in6;
+ out3 = out2 ^ in1 ^ in7;
+ out4 = out3 ^ in2;
+ out5 = tmp1 ^ out4;
+ out6 = tmp2 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_BE(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in3 ^ in6;
+ out4 = tmp0 ^ in5;
+ out7 = tmp0 ^ in2;
+ out3 = out4 ^ in4;
+ out1 = out3 ^ out7 ^ in0;
+ out2 = out3 ^ in3 ^ in7;
+ out0 = out2 ^ out4 ^ in1;
+ out5 = tmp0 ^ out0;
+ out6 = out1 ^ out5 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_BF(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in4;
+ out3 = tmp0 ^ in5 ^ in6;
+ out4 = out3 ^ in3;
+ tmp1 = out3 ^ in7;
+ out2 = tmp1 ^ in2;
+ out5 = tmp1 ^ in1;
+ tmp2 = out2 ^ in5;
+ out7 = tmp2 ^ in3 ^ in4;
+ tmp3 = tmp0 ^ out5;
+ out0 = tmp3 ^ out4;
+ out1 = tmp2 ^ tmp3;
+ out6 = tmp3 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_C0(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out5 = in2 ^ in5;
+ tmp0 = in1 ^ in4;
+ tmp1 = in3 ^ in6;
+ out0 = out5 ^ in1;
+ out4 = tmp0 ^ in7;
+ out3 = tmp0 ^ tmp1;
+ out1 = tmp1 ^ in2;
+ out6 = tmp1 ^ in0;
+ out7 = out4 ^ in0;
+ out2 = out4 ^ out5 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_C1(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out5 = in2;
+ tmp0 = in0 ^ in1;
+ out4 = in1 ^ in7;
+ out6 = in0 ^ in3;
+ out3 = in1 ^ in4 ^ in6;
+ tmp1 = tmp0 ^ in2;
+ out7 = tmp0 ^ in4;
+ out0 = tmp1 ^ in5;
+ out1 = tmp1 ^ out6 ^ in6;
+ out2 = out6 ^ out7 ^ in5 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_C2(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in1 ^ in3 ^ in4;
+ tmp0 = in0 ^ in3 ^ in6;
+ out5 = in2 ^ in4 ^ in5;
+ tmp1 = out4 ^ in7;
+ out1 = tmp0 ^ in2;
+ out6 = tmp0 ^ in5;
+ out2 = out5 ^ in3;
+ out7 = tmp0 ^ tmp1;
+ out3 = tmp1 ^ in2 ^ in6;
+ out0 = tmp1 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_C3(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in1 ^ in3;
+ tmp0 = in0 ^ in2;
+ tmp1 = in3 ^ in5;
+ out5 = in2 ^ in4;
+ tmp2 = tmp0 ^ out4;
+ out2 = tmp1 ^ in4;
+ out6 = tmp1 ^ in0;
+ out0 = tmp1 ^ tmp2 ^ in7;
+ out1 = tmp2 ^ in6;
+ out7 = out1 ^ out5 ^ in3;
+ out3 = tmp0 ^ out7 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_C4(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in7;
+ out3 = tmp0 ^ in4;
+ tmp1 = tmp0 ^ in2;
+ out1 = tmp1 ^ in6;
+ out5 = tmp1 ^ in5;
+ out4 = out1 ^ out3 ^ in1;
+ out0 = out4 ^ in4 ^ in5;
+ out2 = out0 ^ out3 ^ in0;
+ out7 = out1 ^ out2 ^ in7;
+ out6 = tmp1 ^ out0 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_C5(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in4 ^ in7;
+ tmp0 = in3 ^ in7;
+ out4 = in1 ^ in2 ^ in6;
+ out6 = in0 ^ in3 ^ in4;
+ out5 = tmp0 ^ in2;
+ out1 = tmp0 ^ out4;
+ out0 = out4 ^ in0 ^ in5;
+ out2 = out0 ^ out5 ^ in4;
+ out7 = tmp0 ^ out2 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_C6(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in5 ^ in6;
+ tmp1 = in1 ^ in7;
+ tmp2 = tmp0 ^ in0;
+ tmp3 = tmp0 ^ tmp1;
+ tmp4 = tmp2 ^ in4;
+ out0 = tmp3 ^ in2;
+ out6 = tmp4 ^ in3;
+ out2 = out6 ^ in2;
+ out7 = tmp1 ^ tmp4;
+ out3 = tmp2 ^ out2;
+ tmp5 = out3 ^ in5;
+ out5 = tmp5 ^ in7;
+ out4 = tmp3 ^ tmp5;
+ out1 = tmp4 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_C7(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in2 ^ in4;
+ tmp0 = in3 ^ in5;
+ tmp1 = out3 ^ in7;
+ out6 = tmp0 ^ in0 ^ in4;
+ out5 = tmp1 ^ in3;
+ out2 = out6 ^ in6;
+ out7 = out2 ^ in1 ^ in3;
+ out0 = tmp1 ^ out7;
+ out1 = tmp0 ^ out0;
+ out4 = out1 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_C8(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in1 ^ in2;
+ out1 = in2 ^ in3;
+ tmp0 = in5 ^ in6;
+ tmp1 = in0 ^ in7;
+ out2 = out1 ^ in1 ^ in4;
+ out4 = tmp0 ^ in4;
+ out5 = tmp0 ^ in7;
+ out6 = tmp1 ^ in6;
+ out7 = tmp1 ^ in1;
+ out3 = out2 ^ in0 ^ in2 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_C9(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in5 ^ in6;
+ out7 = in0 ^ in1;
+ tmp0 = in1 ^ in3;
+ out5 = in6 ^ in7;
+ out6 = in0 ^ in7;
+ out0 = out7 ^ in2;
+ out3 = out7 ^ in4 ^ in5;
+ out1 = tmp0 ^ in2;
+ out2 = tmp0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_CA(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in7;
+ tmp1 = in2 ^ in7;
+ tmp2 = tmp0 ^ in6;
+ out0 = tmp1 ^ in1;
+ tmp3 = tmp1 ^ in3;
+ out6 = tmp2 ^ in5;
+ out7 = tmp2 ^ in1;
+ out2 = tmp3 ^ in4;
+ out5 = out6 ^ in0 ^ in4;
+ out4 = out5 ^ in3;
+ out1 = tmp0 ^ tmp3;
+ out3 = tmp3 ^ out5 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_CB(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in4 ^ in7;
+ tmp1 = in5 ^ in7;
+ out7 = in0 ^ in1 ^ in6;
+ out5 = tmp0 ^ in6;
+ out2 = tmp0 ^ in3;
+ out6 = tmp1 ^ in0;
+ out4 = tmp1 ^ in3 ^ in6;
+ tmp2 = out5 ^ out7 ^ in2;
+ out1 = tmp2 ^ out2;
+ out0 = tmp2 ^ in4;
+ out3 = tmp2 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_CC(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in5;
+ tmp1 = in1 ^ in6;
+ out1 = in2 ^ in3 ^ in7;
+ out5 = tmp0 ^ in6;
+ out0 = tmp1 ^ in2;
+ tmp2 = out5 ^ in0 ^ in7;
+ out3 = tmp2 ^ in4;
+ out6 = tmp0 ^ out3;
+ out7 = tmp1 ^ tmp2 ^ in3;
+ tmp3 = out1 ^ out6;
+ out4 = tmp2 ^ tmp3;
+ out2 = tmp3 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_CD(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out5 = in3 ^ in6;
+ tmp0 = in0 ^ in1;
+ tmp1 = in2 ^ in7;
+ out6 = in0 ^ in4 ^ in7;
+ out2 = tmp0 ^ out5 ^ in4;
+ out7 = tmp0 ^ in5;
+ out0 = tmp0 ^ in2 ^ in6;
+ out4 = tmp1 ^ in5;
+ out1 = tmp1 ^ in1 ^ in3;
+ out3 = out6 ^ in5 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_CE(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in5;
+ tmp1 = tmp0 ^ in3;
+ out4 = tmp1 ^ in4;
+ tmp2 = out4 ^ in6;
+ out3 = tmp2 ^ in0;
+ out5 = tmp2 ^ in2;
+ out2 = out3 ^ in5 ^ in7;
+ out6 = tmp1 ^ out2;
+ out7 = out2 ^ out4 ^ in1;
+ out1 = tmp2 ^ out6;
+ out0 = tmp0 ^ out7 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_CF(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in6;
+ tmp1 = in0 ^ in1 ^ in5;
+ out4 = in2 ^ in3 ^ in5;
+ out5 = tmp0 ^ in4;
+ out7 = tmp1 ^ in6;
+ out1 = tmp1 ^ out4 ^ in7;
+ tmp2 = out5 ^ in0;
+ out2 = tmp2 ^ in7;
+ out3 = tmp2 ^ out4;
+ out6 = tmp0 ^ out2 ^ in5;
+ out0 = tmp0 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_D0(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in3;
+ tmp1 = in1 ^ in4;
+ tmp2 = in2 ^ in5;
+ out7 = tmp0 ^ tmp1;
+ out0 = tmp1 ^ tmp2;
+ tmp3 = tmp2 ^ in3;
+ out1 = tmp3 ^ in6;
+ tmp4 = out1 ^ in1;
+ out2 = tmp4 ^ in7;
+ out3 = out2 ^ in2;
+ out4 = tmp0 ^ out3;
+ out5 = tmp3 ^ out3;
+ out6 = tmp4 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_D1(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in5 ^ in6;
+ tmp1 = tmp0 ^ in1;
+ out1 = tmp1 ^ in2;
+ out2 = tmp1 ^ in7;
+ out3 = out2 ^ in3;
+ out5 = out3 ^ in2;
+ tmp2 = out3 ^ in0;
+ out4 = tmp2 ^ in4;
+ out7 = tmp0 ^ out4;
+ out6 = tmp2 ^ out1 ^ in6;
+ out0 = out2 ^ out6 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_D2(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in5 ^ in6;
+ out2 = tmp0 ^ in2 ^ in3;
+ out1 = out2 ^ in0;
+ out3 = out2 ^ in1;
+ out4 = out1 ^ in1 ^ in2;
+ out6 = out1 ^ in6 ^ in7;
+ out7 = out4 ^ in4 ^ in5;
+ out5 = out4 ^ out6 ^ in4;
+ out0 = tmp0 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_D3(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in3 ^ in5 ^ in6;
+ tmp0 = out2 ^ in2;
+ tmp1 = tmp0 ^ in1;
+ out1 = tmp1 ^ in0;
+ out3 = tmp1 ^ in3;
+ out4 = out1 ^ in2 ^ in4;
+ tmp2 = out4 ^ in5;
+ out7 = tmp2 ^ in7;
+ out0 = tmp0 ^ out7;
+ tmp3 = out0 ^ in0;
+ out5 = tmp3 ^ in6;
+ out6 = tmp2 ^ tmp3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_D4(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in3 ^ in5;
+ tmp0 = in1 ^ in5;
+ tmp1 = tmp0 ^ in2;
+ out4 = tmp1 ^ in0;
+ tmp2 = tmp1 ^ in6;
+ out2 = out4 ^ in3 ^ in7;
+ out0 = tmp2 ^ in4;
+ out5 = tmp2 ^ out3;
+ out1 = tmp0 ^ out5 ^ in7;
+ out6 = tmp0 ^ out2 ^ in4;
+ out7 = tmp1 ^ out6 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_D5(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in5;
+ tmp0 = in0 ^ in4;
+ tmp1 = tmp0 ^ in1 ^ in5;
+ out4 = tmp1 ^ in2;
+ out0 = out4 ^ in6;
+ tmp2 = tmp0 ^ out0;
+ out5 = tmp2 ^ in3;
+ out1 = out5 ^ in7;
+ out6 = tmp1 ^ out1;
+ out7 = tmp2 ^ out6;
+ out2 = out7 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_D6(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in2 ^ in4 ^ in6;
+ out5 = tmp0 ^ in3;
+ out0 = tmp0 ^ in5 ^ in7;
+ out3 = out0 ^ out5 ^ in2;
+ tmp1 = out3 ^ in0;
+ out1 = tmp1 ^ in6;
+ out2 = tmp1 ^ in7;
+ out4 = tmp1 ^ in1;
+ out6 = tmp1 ^ in4;
+ out7 = tmp0 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_D7(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in3;
+ out3 = in2 ^ in5 ^ in7;
+ out2 = tmp0 ^ in5;
+ tmp1 = tmp0 ^ out3 ^ in1;
+ out1 = tmp1 ^ in6;
+ out4 = tmp1 ^ in4;
+ tmp2 = out1 ^ in4;
+ out6 = tmp2 ^ in1;
+ out7 = tmp2 ^ in2;
+ out0 = tmp2 ^ in3;
+ out5 = tmp2 ^ in0 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_D8(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in0;
+ out5 = in1;
+ tmp0 = in1 ^ in2;
+ out6 = in0 ^ in2;
+ out0 = tmp0 ^ in4;
+ tmp1 = tmp0 ^ in3;
+ out7 = tmp1 ^ out6;
+ out2 = tmp1 ^ in6;
+ out3 = out7 ^ in7;
+ out1 = tmp1 ^ in1 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_D9(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in0 ^ in4;
+ out5 = in1 ^ in5;
+ out2 = in1 ^ in3 ^ in6;
+ out3 = in0 ^ in1 ^ in7;
+ out6 = in0 ^ in2 ^ in6;
+ out0 = out4 ^ in1 ^ in2;
+ out1 = out5 ^ in2 ^ in3;
+ out7 = out3 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_DA(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out5 = in1 ^ in4;
+ tmp0 = in2 ^ in7;
+ tmp1 = in0 ^ in2 ^ in3;
+ out0 = tmp0 ^ out5;
+ out4 = tmp0 ^ tmp1;
+ out2 = tmp0 ^ in3 ^ in6;
+ out1 = tmp1 ^ in5;
+ out3 = tmp1 ^ in1;
+ out6 = out1 ^ in3;
+ out7 = out3 ^ in2 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_DB(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in1 ^ in5;
+ tmp2 = in3 ^ in7;
+ out3 = tmp0 ^ in2;
+ out5 = tmp1 ^ in4;
+ out6 = tmp1 ^ out3 ^ in6;
+ out2 = tmp2 ^ in6;
+ tmp3 = tmp2 ^ in4;
+ tmp4 = out3 ^ in3;
+ out4 = tmp3 ^ in0;
+ out1 = tmp4 ^ in5;
+ out0 = tmp3 ^ tmp4;
+ out7 = tmp0 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_DC(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in2;
+ tmp1 = in0 ^ in3;
+ out6 = tmp0 ^ in4;
+ tmp2 = tmp0 ^ in7;
+ out3 = tmp1 ^ in6;
+ tmp3 = tmp1 ^ in1;
+ out1 = tmp1 ^ tmp2 ^ in5;
+ out4 = tmp2 ^ in6;
+ out2 = tmp3 ^ in2;
+ out7 = tmp3 ^ in5;
+ out5 = tmp2 ^ out2;
+ out0 = out2 ^ out3 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_DD(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in0 ^ in6;
+ out2 = in0 ^ in1 ^ in3;
+ out6 = out3 ^ in2 ^ in4;
+ out7 = out2 ^ in5 ^ in7;
+ out0 = out6 ^ in1;
+ out4 = out6 ^ in7;
+ out5 = out7 ^ in0;
+ out1 = out5 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_DE(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in3 ^ in6;
+ tmp1 = in3 ^ in4 ^ in7;
+ out4 = tmp0 ^ in0;
+ out5 = tmp1 ^ in1;
+ out3 = out4 ^ in7;
+ out2 = out3 ^ in6;
+ out1 = out2 ^ in5;
+ out6 = tmp1 ^ out1;
+ out0 = tmp0 ^ out5;
+ out7 = out0 ^ out1 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_DF(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in0 ^ in3 ^ in7;
+ tmp0 = out2 ^ in1 ^ in5;
+ out1 = tmp0 ^ in2;
+ out7 = tmp0 ^ in6;
+ out5 = tmp0 ^ in0 ^ in4;
+ tmp1 = out1 ^ out5 ^ in6;
+ out4 = tmp1 ^ in3;
+ out6 = tmp1 ^ in5;
+ tmp2 = tmp1 ^ in7;
+ out0 = tmp2 ^ in1;
+ out3 = tmp2 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_E0(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in1 ^ in7;
+ tmp0 = in2 ^ in4;
+ out4 = out3 ^ in3 ^ in5;
+ out2 = tmp0 ^ in1;
+ tmp1 = tmp0 ^ in6;
+ out0 = out4 ^ in2;
+ out6 = out4 ^ in0;
+ out1 = tmp1 ^ in3;
+ out5 = tmp1 ^ in0;
+ out7 = out5 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_E1(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in1 ^ in4;
+ tmp0 = in1 ^ in7;
+ out3 = tmp0 ^ in3;
+ tmp1 = out3 ^ in5;
+ out4 = tmp1 ^ in4;
+ tmp2 = tmp1 ^ in0;
+ out0 = tmp2 ^ in2;
+ out6 = tmp2 ^ in6;
+ tmp3 = out0 ^ out4 ^ in6;
+ out5 = tmp3 ^ in5;
+ out7 = tmp0 ^ tmp3;
+ out1 = tmp2 ^ out5 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_E2(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in1 ^ in2;
+ out4 = in1 ^ in5;
+ out2 = in2 ^ in4 ^ in7;
+ out5 = in0 ^ in2 ^ in6;
+ out0 = out3 ^ in3 ^ in5;
+ out7 = out3 ^ in0 ^ in4;
+ out6 = out2 ^ out7 ^ in3;
+ out1 = out5 ^ in3 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_E3(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in4 ^ in7;
+ tmp0 = in1 ^ in3;
+ out3 = tmp0 ^ in2;
+ tmp1 = out3 ^ in0;
+ out0 = tmp1 ^ in5;
+ tmp2 = tmp1 ^ in4;
+ out1 = tmp2 ^ in6;
+ tmp3 = tmp2 ^ in3;
+ out7 = tmp3 ^ in7;
+ out6 = out1 ^ out2 ^ in2;
+ tmp4 = tmp0 ^ out0;
+ out5 = tmp4 ^ in6;
+ out4 = tmp3 ^ tmp4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_E4(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in6;
+ tmp0 = in0 ^ in4;
+ tmp1 = tmp0 ^ in2 ^ in6;
+ out2 = tmp1 ^ in1;
+ out7 = out2 ^ in5;
+ tmp2 = tmp0 ^ out7;
+ out4 = tmp2 ^ in3;
+ out0 = out4 ^ in7;
+ out6 = tmp1 ^ out0;
+ out5 = tmp2 ^ out6;
+ out1 = out5 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_E5(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in3 ^ in6;
+ tmp0 = in0 ^ in1;
+ tmp1 = in5 ^ in7;
+ out2 = tmp0 ^ in4 ^ in6;
+ tmp2 = tmp1 ^ out2;
+ out6 = tmp2 ^ in3;
+ out7 = tmp2 ^ in2;
+ out0 = out6 ^ in2 ^ in4;
+ out5 = out6 ^ in1 ^ in2;
+ out1 = tmp0 ^ out5 ^ in5;
+ out4 = tmp1 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_E6(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in2 ^ in6 ^ in7;
+ out2 = out3 ^ in0 ^ in4;
+ out4 = out3 ^ in1 ^ in5;
+ out1 = out2 ^ in3;
+ out7 = out2 ^ out4 ^ in2;
+ out0 = out4 ^ in3 ^ in7;
+ out5 = out1 ^ in4;
+ out6 = out0 ^ out2 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_E7(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in3;
+ out3 = tmp0 ^ in6 ^ in7;
+ tmp1 = out3 ^ in0;
+ out5 = tmp1 ^ in5;
+ tmp2 = tmp1 ^ in4;
+ tmp3 = out5 ^ in7;
+ out1 = tmp2 ^ in1;
+ out0 = tmp3 ^ in1;
+ out6 = out1 ^ in2;
+ out2 = tmp0 ^ tmp2;
+ tmp4 = tmp3 ^ out6;
+ out4 = tmp4 ^ in6;
+ out7 = tmp4 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_E8(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in3 ^ in6;
+ tmp0 = in4 ^ in7;
+ out1 = in2 ^ in3 ^ in4;
+ out5 = tmp0 ^ in0;
+ tmp1 = tmp0 ^ in1;
+ tmp2 = tmp1 ^ in5;
+ out0 = tmp1 ^ out1;
+ out2 = tmp2 ^ in2;
+ out6 = tmp2 ^ out5;
+ tmp3 = out6 ^ in6;
+ out3 = tmp3 ^ in7;
+ out7 = tmp3 ^ in2 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_E9(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in3 ^ in6;
+ tmp2 = tmp0 ^ in6;
+ out4 = tmp1 ^ in4;
+ out6 = tmp2 ^ in5;
+ out7 = tmp2 ^ in2 ^ in7;
+ out3 = out6 ^ in3 ^ in7;
+ out0 = tmp1 ^ out7;
+ out2 = out3 ^ out4 ^ in0;
+ out5 = tmp0 ^ out2;
+ out1 = out0 ^ out5 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_EA(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in6 ^ in7;
+ out5 = in0 ^ in7;
+ out6 = in0 ^ in1;
+ out0 = in1 ^ in2 ^ in3;
+ out2 = in2 ^ in4 ^ in5;
+ out7 = out6 ^ in2;
+ out1 = out0 ^ out6 ^ in4;
+ out3 = out7 ^ in5 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_EB(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in4 ^ in5;
+ tmp0 = in0 ^ in1;
+ out4 = in4 ^ in6 ^ in7;
+ out5 = in0 ^ in5 ^ in7;
+ out6 = tmp0 ^ in6;
+ tmp1 = tmp0 ^ in2;
+ out0 = tmp1 ^ in3;
+ out7 = tmp1 ^ in7;
+ out1 = out0 ^ in4;
+ out3 = out0 ^ in5 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_EC(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in0 ^ in5;
+ out4 = in2 ^ in3 ^ in7;
+ out5 = in0 ^ in3 ^ in4;
+ out6 = out3 ^ in1 ^ in4;
+ out1 = out4 ^ in4;
+ out0 = out4 ^ in1 ^ in6;
+ out2 = out0 ^ out5 ^ in5;
+ out7 = out2 ^ in4 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_ED(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in4;
+ tmp1 = in3 ^ in5;
+ out4 = tmp0 ^ in3 ^ in7;
+ out3 = tmp1 ^ in0;
+ out1 = out4 ^ in1;
+ out5 = out3 ^ in4;
+ out7 = out1 ^ out5 ^ in6;
+ out2 = tmp0 ^ out7;
+ out0 = tmp1 ^ out7;
+ out6 = out2 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_EE(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in2;
+ tmp0 = in0 ^ in1;
+ out5 = in0 ^ in3;
+ tmp1 = tmp0 ^ in2;
+ out6 = tmp0 ^ in4;
+ tmp2 = tmp1 ^ out5;
+ out7 = tmp1 ^ in5;
+ out1 = tmp2 ^ out6 ^ in7;
+ out0 = tmp2 ^ in6;
+ tmp3 = out7 ^ in1;
+ out3 = tmp3 ^ in7;
+ out2 = tmp3 ^ in4 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_EF(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in2 ^ in4;
+ tmp0 = in0 ^ in5;
+ tmp1 = in4 ^ in6;
+ out5 = tmp0 ^ in3;
+ out2 = tmp0 ^ tmp1;
+ out6 = tmp1 ^ in0 ^ in1;
+ out3 = out5 ^ in2 ^ in7;
+ out7 = out3 ^ in1 ^ in3;
+ out0 = out4 ^ out6 ^ in3;
+ out1 = tmp1 ^ out0 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_F0(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in2;
+ tmp1 = in4 ^ in5;
+ out2 = tmp0 ^ in6;
+ out3 = tmp1 ^ in1;
+ tmp2 = tmp1 ^ in7;
+ out1 = out2 ^ out3 ^ in3;
+ tmp3 = tmp0 ^ tmp2;
+ out0 = tmp3 ^ in3;
+ out5 = tmp3 ^ in0;
+ out4 = out1 ^ out5 ^ in4;
+ out7 = out4 ^ in2;
+ out6 = tmp2 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_F1(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in1 ^ in6;
+ tmp0 = in3 ^ in5;
+ out3 = tmp0 ^ in1 ^ in4;
+ tmp1 = out3 ^ in2;
+ out1 = tmp1 ^ in6;
+ tmp2 = tmp1 ^ in0;
+ tmp3 = out1 ^ in5;
+ out0 = tmp2 ^ in7;
+ out6 = tmp2 ^ in4;
+ out7 = tmp3 ^ in0;
+ out5 = tmp0 ^ out0;
+ out4 = tmp3 ^ out5 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_F2(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in4 ^ in5;
+ out2 = in2 ^ in6 ^ in7;
+ tmp1 = tmp0 ^ in1;
+ tmp2 = tmp1 ^ in2;
+ out0 = tmp2 ^ in3;
+ out3 = tmp2 ^ in7;
+ out5 = out3 ^ in0 ^ in4;
+ tmp3 = tmp0 ^ out5;
+ out7 = tmp3 ^ in3;
+ out4 = tmp3 ^ out2;
+ out1 = out0 ^ out4 ^ in4;
+ out6 = tmp1 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_F3(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in6 ^ in7;
+ tmp0 = in0 ^ in1;
+ out4 = tmp0 ^ in6;
+ tmp1 = tmp0 ^ in2;
+ out5 = tmp1 ^ in7;
+ out6 = tmp1 ^ in3;
+ out7 = out6 ^ in4;
+ out0 = out7 ^ in5;
+ out1 = out0 ^ in6;
+ out3 = out0 ^ in0 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_F4(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in0 ^ in1 ^ in2;
+ tmp0 = out2 ^ in3;
+ out4 = tmp0 ^ in4;
+ out5 = out4 ^ in5;
+ out6 = out5 ^ in6;
+ out7 = out6 ^ in7;
+ out0 = out7 ^ in0;
+ out1 = out0 ^ in1;
+ out3 = tmp0 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_F5(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in0 ^ in1;
+ tmp0 = out2 ^ in2;
+ out4 = tmp0 ^ in3;
+ out5 = out4 ^ in4;
+ out6 = out5 ^ in5;
+ out7 = out6 ^ in6;
+ out0 = out7 ^ in7;
+ out1 = out0 ^ in0;
+ out3 = tmp0 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_F6(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in7;
+ out2 = tmp0 ^ in2;
+ out4 = out2 ^ in1 ^ in4;
+ out7 = out4 ^ in3 ^ in5;
+ out5 = out7 ^ in4 ^ in7;
+ out0 = tmp0 ^ out7 ^ in6;
+ tmp1 = out0 ^ in1;
+ out6 = out0 ^ in0 ^ in5;
+ out3 = tmp1 ^ in3;
+ out1 = tmp0 ^ tmp1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_F7(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in0 ^ in7;
+ tmp0 = out2 ^ in1;
+ out4 = tmp0 ^ in2;
+ out5 = out4 ^ in3 ^ in7;
+ out6 = out5 ^ in4;
+ out7 = out6 ^ in5;
+ out0 = out7 ^ in6;
+ out1 = out0 ^ in7;
+ out3 = tmp0 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_F8(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in4;
+ tmp1 = in3 ^ in5;
+ tmp2 = tmp0 ^ in6;
+ out4 = tmp0 ^ tmp1;
+ out1 = tmp1 ^ in2 ^ in4;
+ out3 = tmp2 ^ in1;
+ out5 = out3 ^ in5;
+ out7 = out1 ^ out5 ^ in7;
+ out6 = tmp1 ^ out7;
+ out0 = tmp2 ^ out7;
+ out2 = out6 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_F9(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in5;
+ tmp1 = in0 ^ in6;
+ out4 = tmp0 ^ in0;
+ tmp2 = tmp1 ^ in4;
+ tmp3 = tmp1 ^ in2;
+ out5 = tmp2 ^ in1;
+ out3 = out5 ^ in3;
+ tmp4 = tmp3 ^ out3;
+ out1 = tmp4 ^ in5;
+ out0 = tmp4 ^ in0 ^ in7;
+ out6 = tmp0 ^ out0 ^ in4;
+ out7 = tmp2 ^ tmp4;
+ out2 = tmp3 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_FA(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = tmp0 ^ in2;
+ tmp2 = tmp0 ^ in5;
+ tmp3 = tmp1 ^ in7;
+ out5 = tmp2 ^ in6;
+ out6 = tmp3 ^ in6;
+ out7 = tmp3 ^ in3;
+ out3 = out6 ^ in4;
+ out2 = tmp1 ^ out5;
+ out4 = out2 ^ out3 ^ in1;
+ out0 = out4 ^ out7 ^ in5;
+ out1 = tmp2 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_FB(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in5 ^ in6;
+ tmp0 = in0 ^ in1;
+ out4 = in0 ^ in5 ^ in7;
+ out5 = tmp0 ^ in6;
+ tmp1 = tmp0 ^ in2;
+ out6 = tmp1 ^ in7;
+ out7 = tmp1 ^ in3;
+ out0 = out7 ^ in4;
+ out1 = out0 ^ in5;
+ out3 = out0 ^ in6 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_FC(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in2;
+ tmp1 = in0 ^ in7;
+ out2 = tmp0 ^ tmp1 ^ in5;
+ out3 = tmp1 ^ in4;
+ tmp2 = out2 ^ in6;
+ out6 = tmp2 ^ in4;
+ out7 = tmp2 ^ in3;
+ out4 = out6 ^ in1 ^ in3;
+ tmp3 = out4 ^ in0;
+ out1 = tmp3 ^ in6;
+ out0 = tmp3 ^ in1 ^ in5;
+ out5 = tmp0 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_FD(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in5;
+ tmp1 = in1 ^ in7;
+ out2 = tmp0 ^ tmp1;
+ out6 = out2 ^ in2 ^ in4;
+ tmp2 = out6 ^ in0;
+ out1 = tmp2 ^ in3;
+ out0 = tmp0 ^ out1 ^ in6;
+ out5 = out0 ^ in2;
+ tmp3 = out5 ^ in1;
+ out3 = tmp3 ^ in6;
+ out7 = tmp2 ^ tmp3;
+ out4 = tmp1 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_FE(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in2;
+ out2 = tmp0 ^ in5;
+ out3 = tmp0 ^ in4;
+ tmp1 = out3 ^ in6;
+ out4 = tmp1 ^ in5;
+ tmp2 = tmp1 ^ in1;
+ out6 = tmp2 ^ in7;
+ tmp3 = tmp2 ^ in0;
+ out0 = tmp3 ^ in3;
+ tmp4 = out0 ^ out4 ^ in7;
+ out5 = tmp4 ^ in6;
+ out7 = tmp4 ^ in2;
+ out1 = tmp3 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_FF(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in0 ^ in5;
+ tmp0 = in4 ^ in7;
+ tmp1 = out2 ^ in2;
+ out4 = tmp1 ^ in6;
+ out7 = tmp1 ^ in1 ^ in3;
+ out1 = tmp0 ^ out7;
+ tmp2 = out1 ^ in5;
+ out6 = tmp2 ^ in3;
+ tmp3 = tmp2 ^ in7;
+ out0 = tmp3 ^ in6;
+ out3 = tmp3 ^ in1;
+ out5 = tmp0 ^ out0 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void (*gf8_muladd[])(void *out, void *in) = {
+ gf8_muladd_00, gf8_muladd_01, gf8_muladd_02, gf8_muladd_03, gf8_muladd_04,
+ gf8_muladd_05, gf8_muladd_06, gf8_muladd_07, gf8_muladd_08, gf8_muladd_09,
+ gf8_muladd_0A, gf8_muladd_0B, gf8_muladd_0C, gf8_muladd_0D, gf8_muladd_0E,
+ gf8_muladd_0F, gf8_muladd_10, gf8_muladd_11, gf8_muladd_12, gf8_muladd_13,
+ gf8_muladd_14, gf8_muladd_15, gf8_muladd_16, gf8_muladd_17, gf8_muladd_18,
+ gf8_muladd_19, gf8_muladd_1A, gf8_muladd_1B, gf8_muladd_1C, gf8_muladd_1D,
+ gf8_muladd_1E, gf8_muladd_1F, gf8_muladd_20, gf8_muladd_21, gf8_muladd_22,
+ gf8_muladd_23, gf8_muladd_24, gf8_muladd_25, gf8_muladd_26, gf8_muladd_27,
+ gf8_muladd_28, gf8_muladd_29, gf8_muladd_2A, gf8_muladd_2B, gf8_muladd_2C,
+ gf8_muladd_2D, gf8_muladd_2E, gf8_muladd_2F, gf8_muladd_30, gf8_muladd_31,
+ gf8_muladd_32, gf8_muladd_33, gf8_muladd_34, gf8_muladd_35, gf8_muladd_36,
+ gf8_muladd_37, gf8_muladd_38, gf8_muladd_39, gf8_muladd_3A, gf8_muladd_3B,
+ gf8_muladd_3C, gf8_muladd_3D, gf8_muladd_3E, gf8_muladd_3F, gf8_muladd_40,
+ gf8_muladd_41, gf8_muladd_42, gf8_muladd_43, gf8_muladd_44, gf8_muladd_45,
+ gf8_muladd_46, gf8_muladd_47, gf8_muladd_48, gf8_muladd_49, gf8_muladd_4A,
+ gf8_muladd_4B, gf8_muladd_4C, gf8_muladd_4D, gf8_muladd_4E, gf8_muladd_4F,
+ gf8_muladd_50, gf8_muladd_51, gf8_muladd_52, gf8_muladd_53, gf8_muladd_54,
+ gf8_muladd_55, gf8_muladd_56, gf8_muladd_57, gf8_muladd_58, gf8_muladd_59,
+ gf8_muladd_5A, gf8_muladd_5B, gf8_muladd_5C, gf8_muladd_5D, gf8_muladd_5E,
+ gf8_muladd_5F, gf8_muladd_60, gf8_muladd_61, gf8_muladd_62, gf8_muladd_63,
+ gf8_muladd_64, gf8_muladd_65, gf8_muladd_66, gf8_muladd_67, gf8_muladd_68,
+ gf8_muladd_69, gf8_muladd_6A, gf8_muladd_6B, gf8_muladd_6C, gf8_muladd_6D,
+ gf8_muladd_6E, gf8_muladd_6F, gf8_muladd_70, gf8_muladd_71, gf8_muladd_72,
+ gf8_muladd_73, gf8_muladd_74, gf8_muladd_75, gf8_muladd_76, gf8_muladd_77,
+ gf8_muladd_78, gf8_muladd_79, gf8_muladd_7A, gf8_muladd_7B, gf8_muladd_7C,
+ gf8_muladd_7D, gf8_muladd_7E, gf8_muladd_7F, gf8_muladd_80, gf8_muladd_81,
+ gf8_muladd_82, gf8_muladd_83, gf8_muladd_84, gf8_muladd_85, gf8_muladd_86,
+ gf8_muladd_87, gf8_muladd_88, gf8_muladd_89, gf8_muladd_8A, gf8_muladd_8B,
+ gf8_muladd_8C, gf8_muladd_8D, gf8_muladd_8E, gf8_muladd_8F, gf8_muladd_90,
+ gf8_muladd_91, gf8_muladd_92, gf8_muladd_93, gf8_muladd_94, gf8_muladd_95,
+ gf8_muladd_96, gf8_muladd_97, gf8_muladd_98, gf8_muladd_99, gf8_muladd_9A,
+ gf8_muladd_9B, gf8_muladd_9C, gf8_muladd_9D, gf8_muladd_9E, gf8_muladd_9F,
+ gf8_muladd_A0, gf8_muladd_A1, gf8_muladd_A2, gf8_muladd_A3, gf8_muladd_A4,
+ gf8_muladd_A5, gf8_muladd_A6, gf8_muladd_A7, gf8_muladd_A8, gf8_muladd_A9,
+ gf8_muladd_AA, gf8_muladd_AB, gf8_muladd_AC, gf8_muladd_AD, gf8_muladd_AE,
+ gf8_muladd_AF, gf8_muladd_B0, gf8_muladd_B1, gf8_muladd_B2, gf8_muladd_B3,
+ gf8_muladd_B4, gf8_muladd_B5, gf8_muladd_B6, gf8_muladd_B7, gf8_muladd_B8,
+ gf8_muladd_B9, gf8_muladd_BA, gf8_muladd_BB, gf8_muladd_BC, gf8_muladd_BD,
+ gf8_muladd_BE, gf8_muladd_BF, gf8_muladd_C0, gf8_muladd_C1, gf8_muladd_C2,
+ gf8_muladd_C3, gf8_muladd_C4, gf8_muladd_C5, gf8_muladd_C6, gf8_muladd_C7,
+ gf8_muladd_C8, gf8_muladd_C9, gf8_muladd_CA, gf8_muladd_CB, gf8_muladd_CC,
+ gf8_muladd_CD, gf8_muladd_CE, gf8_muladd_CF, gf8_muladd_D0, gf8_muladd_D1,
+ gf8_muladd_D2, gf8_muladd_D3, gf8_muladd_D4, gf8_muladd_D5, gf8_muladd_D6,
+ gf8_muladd_D7, gf8_muladd_D8, gf8_muladd_D9, gf8_muladd_DA, gf8_muladd_DB,
+ gf8_muladd_DC, gf8_muladd_DD, gf8_muladd_DE, gf8_muladd_DF, gf8_muladd_E0,
+ gf8_muladd_E1, gf8_muladd_E2, gf8_muladd_E3, gf8_muladd_E4, gf8_muladd_E5,
+ gf8_muladd_E6, gf8_muladd_E7, gf8_muladd_E8, gf8_muladd_E9, gf8_muladd_EA,
+ gf8_muladd_EB, gf8_muladd_EC, gf8_muladd_ED, gf8_muladd_EE, gf8_muladd_EF,
+ gf8_muladd_F0, gf8_muladd_F1, gf8_muladd_F2, gf8_muladd_F3, gf8_muladd_F4,
+ gf8_muladd_F5, gf8_muladd_F6, gf8_muladd_F7, gf8_muladd_F8, gf8_muladd_F9,
+ gf8_muladd_FA, gf8_muladd_FB, gf8_muladd_FC, gf8_muladd_FD, gf8_muladd_FE,
+ gf8_muladd_FF};
+
+static uint64_t zero[EC_METHOD_WORD_SIZE * 8] = {
+ 0,
+};
+
+void
+ec_code_c_prepare(ec_gf_t *gf, uint32_t *values, uint32_t count)
+{
+ uint32_t i, last, tmp;
+
+ last = 1;
+ for (i = count; i > 0; i--) {
+ if (values[i - 1] != 0) {
+ tmp = values[i - 1];
+ values[i - 1] = ec_gf_div(gf, tmp, last);
+ last = tmp;
+ }
+ }
+}
+
+void
+ec_code_c_linear(void *dst, void *src, uint64_t offset, uint32_t *values,
+ uint32_t count)
+{
+ src += offset;
+ gf8_muladd_00(dst, src);
+ while (--count > 0) {
+ src += EC_METHOD_CHUNK_SIZE;
+ gf8_muladd[*values](dst, src);
+ values++;
+ }
+}
+
+void
+ec_code_c_interleaved(void *dst, void **src, uint64_t offset, uint32_t *values,
+ uint32_t count)
+{
+ uint32_t i, last, tmp;
+
+ i = 0;
+ while ((last = *values++) == 0) {
+ i++;
+ }
+ gf8_muladd_00(dst, src[i++] + offset);
+ while (i < count) {
+ tmp = *values++;
+ if (tmp != 0) {
+ gf8_muladd[last](dst, src[i] + offset);
+ last = tmp;
+ }
+ i++;
+ }
+ gf8_muladd[last](dst, zero);
+}
diff --git a/xlators/cluster/ec/src/ec-code-c.h b/xlators/cluster/ec/src/ec-code-c.h
new file mode 100644
index 00000000000..42b5a064eb8
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code-c.h
@@ -0,0 +1,27 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_CODE_C_H__
+#define __EC_CODE_C_H__
+
+#include "ec-types.h"
+
+void
+ec_code_c_prepare(ec_gf_t *gf, uint32_t *values, uint32_t count);
+
+void
+ec_code_c_linear(void *dst, void *src, uint64_t offset, uint32_t *values,
+ uint32_t count);
+
+void
+ec_code_c_interleaved(void *dst, void **src, uint64_t offset, uint32_t *values,
+ uint32_t count);
+
+#endif /* __EC_CODE_C_H__ */
diff --git a/xlators/cluster/ec/src/ec-code-intel.c b/xlators/cluster/ec/src/ec-code-intel.c
new file mode 100644
index 00000000000..f1c4e13e321
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code-intel.c
@@ -0,0 +1,594 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <inttypes.h>
+#include <string.h>
+#include <errno.h>
+
+#include "ec-code-intel.h"
+
+static void
+ec_code_intel_init(ec_code_intel_t *intel)
+{
+ memset(intel, 0, sizeof(ec_code_intel_t));
+}
+
+static void
+ec_code_intel_prefix(ec_code_intel_t *intel, uint8_t prefix)
+{
+ intel->prefix.data[intel->prefix.bytes++] = prefix;
+}
+
+static void
+ec_code_intel_rex(ec_code_intel_t *intel, gf_boolean_t w)
+{
+ gf_boolean_t present = _gf_false;
+
+ if (w) {
+ intel->rex.w = 1;
+ present = _gf_true;
+ }
+ if (intel->modrm.present) {
+ if (intel->modrm.reg > 7) {
+ intel->modrm.reg &= 7;
+ intel->rex.r = 1;
+ present = _gf_true;
+ }
+ if (intel->sib.present) {
+ if (intel->sib.index > 7) {
+ intel->sib.index &= 7;
+ intel->rex.x = 1;
+ present = _gf_true;
+ }
+ if (intel->sib.base > 7) {
+ intel->sib.base &= 7;
+ intel->rex.b = 1;
+ present = _gf_true;
+ }
+ } else if (intel->modrm.rm > 7) {
+ intel->modrm.rm &= 7;
+ intel->rex.b = 1;
+ present = _gf_true;
+ }
+ } else if (intel->reg > 7) {
+ intel->reg &= 7;
+ intel->rex.b = 1;
+ present = _gf_true;
+ }
+ intel->rex.present = present;
+}
+
+static void
+ec_code_intel_vex(ec_code_intel_t *intel, gf_boolean_t w, gf_boolean_t l,
+ ec_code_vex_opcode_t opcode, ec_code_vex_prefix_t prefix,
+ uint32_t reg)
+{
+ ec_code_intel_rex(intel, w);
+ if (((intel->rex.w == 1) || (intel->rex.x == 0) || (intel->rex.b == 0)) ||
+ ((opcode != VEX_OPCODE_NONE) && (opcode != VEX_OPCODE_0F))) {
+ intel->rex.present = _gf_false;
+
+ intel->vex.bytes = 3;
+ intel->vex.data[0] = 0xC4;
+ intel->vex.data[1] = ((intel->rex.r << 7) | (intel->rex.x << 6) |
+ (intel->rex.b << 5) | opcode) ^
+ 0xE0;
+ intel->vex.data[2] = (intel->rex.w << 7) | ((~reg & 0x0F) << 3) |
+ (l ? 0x04 : 0x00) | prefix;
+ } else {
+ intel->vex.bytes = 2;
+ intel->vex.data[0] = 0xC5;
+ intel->vex.data[1] = (intel->rex.r << 7) | ((~reg & 0x0F) << 3) |
+ (l ? 0x04 : 0x00) | prefix;
+ }
+}
+
+static void
+ec_code_intel_modrm_reg(ec_code_intel_t *intel, uint32_t rm, uint32_t reg)
+{
+ intel->modrm.present = _gf_true;
+ intel->modrm.mod = 3;
+ intel->modrm.rm = rm;
+ intel->modrm.reg = reg;
+}
+
+static void
+ec_code_intel_modrm_mem(ec_code_intel_t *intel, uint32_t reg,
+ ec_code_intel_reg_t base, ec_code_intel_reg_t index,
+ uint32_t scale, int32_t offset)
+{
+ if (index == REG_SP) {
+ intel->invalid = _gf_true;
+ return;
+ }
+ if ((index != REG_NULL) && (scale != 1) && (scale != 2) && (scale != 4) &&
+ (scale != 8)) {
+ intel->invalid = _gf_true;
+ return;
+ }
+ scale >>= 1;
+ if (scale == 4) {
+ scale = 3;
+ }
+
+ intel->modrm.present = _gf_true;
+ intel->modrm.reg = reg;
+
+ intel->offset.value = offset;
+ if ((offset == 0) && (base != REG_BP)) {
+ intel->modrm.mod = 0;
+ intel->offset.bytes = 0;
+ } else if ((offset >= -128) && (offset <= 127)) {
+ intel->modrm.mod = 1;
+ intel->offset.bytes = 1;
+ } else {
+ intel->modrm.mod = 2;
+ intel->offset.bytes = 4;
+ }
+
+ intel->modrm.rm = base;
+ if ((index != REG_NULL) || (base == REG_SP)) {
+ intel->modrm.rm = 4;
+ intel->sib.present = _gf_true;
+ intel->sib.index = index;
+ if (index == REG_NULL) {
+ intel->sib.index = 4;
+ }
+ intel->sib.scale = scale;
+ intel->sib.base = base;
+ if (base == REG_NULL) {
+ intel->sib.base = 5;
+ intel->modrm.mod = 0;
+ intel->offset.bytes = 4;
+ }
+ } else if (base == REG_NULL) {
+ intel->modrm.mod = 0;
+ intel->modrm.rm = 5;
+ intel->offset.bytes = 4;
+ }
+}
+
+static void
+ec_code_intel_op_1(ec_code_intel_t *intel, uint8_t opcode, uint32_t reg)
+{
+ intel->reg = reg;
+ intel->opcode.bytes = 1;
+ intel->opcode.data[0] = opcode;
+}
+
+static void
+ec_code_intel_op_2(ec_code_intel_t *intel, uint8_t opcode1, uint8_t opcode2,
+ uint32_t reg)
+{
+ intel->reg = reg;
+ intel->opcode.bytes = 2;
+ intel->opcode.data[0] = opcode1;
+ intel->opcode.data[1] = opcode2;
+}
+
+static void
+ec_code_intel_immediate_1(ec_code_intel_t *intel, uint32_t value)
+{
+ intel->immediate.bytes = 1;
+ intel->immediate.value = value;
+}
+
+static void
+ec_code_intel_immediate_2(ec_code_intel_t *intel, uint32_t value)
+{
+ intel->immediate.bytes = 2;
+ intel->immediate.value = value;
+}
+
+static void
+ec_code_intel_immediate_4(ec_code_intel_t *intel, uint32_t value)
+{
+ intel->immediate.bytes = 4;
+ intel->immediate.value = value;
+}
+
+static void
+ec_code_intel_emit(ec_code_builder_t *builder, ec_code_intel_t *intel)
+{
+ uint8_t insn[15];
+ uint32_t i, count;
+
+ if (intel->invalid) {
+ ec_code_error(builder, EINVAL);
+ return;
+ }
+
+ count = 0;
+ for (i = 0; i < intel->prefix.bytes; i++) {
+ insn[count++] = intel->prefix.data[i];
+ }
+ for (i = 0; i < intel->vex.bytes; i++) {
+ insn[count++] = intel->vex.data[i];
+ }
+ if (intel->rex.present) {
+ insn[count++] = 0x40 | (intel->rex.w << 3) | (intel->rex.r << 2) |
+ (intel->rex.x << 1) | (intel->rex.b << 0);
+ }
+ for (i = 0; i < intel->opcode.bytes; i++) {
+ insn[count++] = intel->opcode.data[i];
+ }
+ if (intel->modrm.present) {
+ insn[count++] = (intel->modrm.mod << 6) | (intel->modrm.reg << 3) |
+ (intel->modrm.rm << 0);
+ if (intel->sib.present) {
+ insn[count++] = (intel->sib.scale << 6) | (intel->sib.index << 3) |
+ (intel->sib.base << 0);
+ }
+ }
+ for (i = 0; i < intel->offset.bytes; i++) {
+ insn[count++] = intel->offset.data[i];
+ }
+ for (i = 0; i < intel->immediate.bytes; i++) {
+ insn[count++] = intel->immediate.data[i];
+ }
+
+ ec_code_emit(builder, insn, count);
+}
+
+void
+ec_code_intel_op_push_r(ec_code_builder_t *builder, ec_code_intel_reg_t reg)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_op_1(&intel, 0x50 | (reg & 7), reg);
+ ec_code_intel_rex(&intel, _gf_false);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_pop_r(ec_code_builder_t *builder, ec_code_intel_reg_t reg)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_op_1(&intel, 0x58 | (reg & 7), reg);
+ ec_code_intel_rex(&intel, _gf_false);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_ret(ec_code_builder_t *builder, uint32_t size)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ if (size == 0) {
+ ec_code_intel_op_1(&intel, 0xC3, 0);
+ } else {
+ ec_code_intel_immediate_2(&intel, size);
+ ec_code_intel_op_1(&intel, 0xC2, 0);
+ }
+ ec_code_intel_rex(&intel, _gf_false);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_mov_r2r(ec_code_builder_t *builder, ec_code_intel_reg_t src,
+ ec_code_intel_reg_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_modrm_reg(&intel, dst, src);
+ ec_code_intel_op_1(&intel, 0x89, 0);
+ ec_code_intel_rex(&intel, _gf_true);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_mov_r2m(ec_code_builder_t *builder, ec_code_intel_reg_t src,
+ ec_code_intel_reg_t base, ec_code_intel_reg_t index,
+ uint32_t scale, int32_t offset)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_modrm_mem(&intel, src, base, index, scale, offset);
+ ec_code_intel_op_1(&intel, 0x89, 0);
+ ec_code_intel_rex(&intel, _gf_true);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_mov_m2r(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, ec_code_intel_reg_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_modrm_mem(&intel, dst, base, index, scale, offset);
+ ec_code_intel_op_1(&intel, 0x8B, 0);
+ ec_code_intel_rex(&intel, _gf_true);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_xor_r2r(ec_code_builder_t *builder, ec_code_intel_reg_t src,
+ ec_code_intel_reg_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_modrm_reg(&intel, dst, src);
+ ec_code_intel_op_1(&intel, 0x31, 0);
+ ec_code_intel_rex(&intel, _gf_true);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_xor_m2r(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, ec_code_intel_reg_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_modrm_mem(&intel, dst, base, index, scale, offset);
+ ec_code_intel_op_1(&intel, 0x33, 0);
+ ec_code_intel_rex(&intel, _gf_true);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_add_i2r(ec_code_builder_t *builder, int32_t value,
+ ec_code_intel_reg_t reg)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ if ((value >= -128) && (value < 128)) {
+ ec_code_intel_modrm_reg(&intel, reg, 0);
+ ec_code_intel_op_1(&intel, 0x83, 0);
+ ec_code_intel_immediate_1(&intel, value);
+ } else {
+ if (reg == REG_AX) {
+ ec_code_intel_op_1(&intel, 0x05, reg);
+ } else {
+ ec_code_intel_modrm_reg(&intel, reg, 0);
+ ec_code_intel_op_1(&intel, 0x81, 0);
+ }
+ ec_code_intel_immediate_4(&intel, value);
+ }
+ ec_code_intel_rex(&intel, _gf_true);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_test_i2r(ec_code_builder_t *builder, uint32_t value,
+ ec_code_intel_reg_t reg)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ if (reg == REG_AX) {
+ ec_code_intel_op_1(&intel, 0xA9, reg);
+ } else {
+ ec_code_intel_modrm_reg(&intel, reg, 0);
+ ec_code_intel_op_1(&intel, 0xF7, 0);
+ }
+ ec_code_intel_immediate_4(&intel, value);
+ ec_code_intel_rex(&intel, _gf_true);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_jne(ec_code_builder_t *builder, uint32_t address)
+{
+ ec_code_intel_t intel;
+ int32_t rel;
+
+ ec_code_intel_init(&intel);
+
+ rel = address - builder->address - 2;
+ if ((rel >= -128) && (rel < 128)) {
+ ec_code_intel_op_1(&intel, 0x75, 0);
+ ec_code_intel_immediate_1(&intel, rel);
+ } else {
+ rel -= 4;
+ ec_code_intel_op_2(&intel, 0x0F, 0x85, 0);
+ ec_code_intel_immediate_4(&intel, rel);
+ }
+ ec_code_intel_rex(&intel, _gf_false);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_mov_sse2sse(ec_code_builder_t *builder, uint32_t src,
+ uint32_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_prefix(&intel, 0x66);
+ ec_code_intel_modrm_reg(&intel, src, dst);
+ ec_code_intel_op_2(&intel, 0x0F, 0x6F, 0);
+ ec_code_intel_rex(&intel, _gf_false);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_mov_sse2m(ec_code_builder_t *builder, uint32_t src,
+ ec_code_intel_reg_t base, ec_code_intel_reg_t index,
+ uint32_t scale, int32_t offset)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_prefix(&intel, 0x66);
+ ec_code_intel_modrm_mem(&intel, src, base, index, scale, offset);
+ ec_code_intel_op_2(&intel, 0x0F, 0x7F, 0);
+ ec_code_intel_rex(&intel, _gf_false);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_mov_m2sse(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, uint32_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_prefix(&intel, 0x66);
+ ec_code_intel_modrm_mem(&intel, dst, base, index, scale, offset);
+ ec_code_intel_op_2(&intel, 0x0F, 0x6F, 0);
+ ec_code_intel_rex(&intel, _gf_false);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_xor_sse2sse(ec_code_builder_t *builder, uint32_t src,
+ uint32_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_prefix(&intel, 0x66);
+ ec_code_intel_modrm_reg(&intel, src, dst);
+ ec_code_intel_op_2(&intel, 0x0F, 0xEF, 0);
+ ec_code_intel_rex(&intel, _gf_false);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_xor_m2sse(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, uint32_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_prefix(&intel, 0x66);
+ ec_code_intel_modrm_mem(&intel, dst, base, index, scale, offset);
+ ec_code_intel_op_2(&intel, 0x0F, 0xEF, 0);
+ ec_code_intel_rex(&intel, _gf_false);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_mov_avx2avx(ec_code_builder_t *builder, uint32_t src,
+ uint32_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_modrm_reg(&intel, src, dst);
+ ec_code_intel_op_1(&intel, 0x6F, 0);
+ ec_code_intel_vex(&intel, _gf_false, _gf_true, VEX_OPCODE_0F, VEX_PREFIX_66,
+ VEX_REG_NONE);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_mov_avx2m(ec_code_builder_t *builder, uint32_t src,
+ ec_code_intel_reg_t base, ec_code_intel_reg_t index,
+ uint32_t scale, int32_t offset)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_modrm_mem(&intel, src, base, index, scale, offset);
+ ec_code_intel_op_1(&intel, 0x7F, 0);
+ ec_code_intel_vex(&intel, _gf_false, _gf_true, VEX_OPCODE_0F, VEX_PREFIX_66,
+ VEX_REG_NONE);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_mov_m2avx(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, uint32_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_modrm_mem(&intel, dst, base, index, scale, offset);
+ ec_code_intel_op_1(&intel, 0x6F, 0);
+ ec_code_intel_vex(&intel, _gf_false, _gf_true, VEX_OPCODE_0F, VEX_PREFIX_66,
+ VEX_REG_NONE);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_xor_avx2avx(ec_code_builder_t *builder, uint32_t src,
+ uint32_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_modrm_reg(&intel, src, dst);
+ ec_code_intel_op_1(&intel, 0xEF, 0);
+ ec_code_intel_vex(&intel, _gf_false, _gf_true, VEX_OPCODE_0F, VEX_PREFIX_66,
+ dst);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_xor_m2avx(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, uint32_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_modrm_mem(&intel, dst, base, index, scale, offset);
+ ec_code_intel_op_1(&intel, 0xEF, 0);
+ ec_code_intel_vex(&intel, _gf_false, _gf_true, VEX_OPCODE_0F, VEX_PREFIX_66,
+ dst);
+
+ ec_code_intel_emit(builder, &intel);
+}
diff --git a/xlators/cluster/ec/src/ec-code-intel.h b/xlators/cluster/ec/src/ec-code-intel.h
new file mode 100644
index 00000000000..3fa4a174765
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code-intel.h
@@ -0,0 +1,191 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_CODE_INTEL_H__
+#define __EC_CODE_INTEL_H__
+
+#include "ec-code.h"
+
+#define VEX_REG_NONE 0
+
+enum _ec_code_intel_reg;
+typedef enum _ec_code_intel_reg ec_code_intel_reg_t;
+
+enum _ec_code_vex_prefix;
+typedef enum _ec_code_vex_prefix ec_code_vex_prefix_t;
+
+enum _ec_code_vex_opcode;
+typedef enum _ec_code_vex_opcode ec_code_vex_opcode_t;
+
+struct _ec_code_intel_buffer;
+typedef struct _ec_code_intel_buffer ec_code_intel_buffer_t;
+
+struct _ec_code_intel_sib;
+typedef struct _ec_code_intel_sib ec_code_intel_sib_t;
+
+struct _ec_code_intel_modrm;
+typedef struct _ec_code_intel_modrm ec_code_intel_modrm_t;
+
+struct _ec_code_intel_rex;
+typedef struct _ec_code_intel_rex ec_code_intel_rex_t;
+
+struct _ec_code_intel;
+typedef struct _ec_code_intel ec_code_intel_t;
+
+enum _ec_code_intel_reg {
+ REG_NULL = -1,
+ REG_AX,
+ REG_CX,
+ REG_DX,
+ REG_BX,
+ REG_SP,
+ REG_BP,
+ REG_SI,
+ REG_DI,
+ REG_8,
+ REG_9,
+ REG_10,
+ REG_11,
+ REG_12,
+ REG_13,
+ REG_14,
+ REG_15
+};
+
+enum _ec_code_vex_prefix {
+ VEX_PREFIX_NONE = 0,
+ VEX_PREFIX_66,
+ VEX_PREFIX_F3,
+ VEX_PREFIX_F2
+};
+
+enum _ec_code_vex_opcode {
+ VEX_OPCODE_NONE = 0,
+ VEX_OPCODE_0F,
+ VEX_OPCODE_0F_38,
+ VEX_OPCODE_0F_3A
+};
+
+struct _ec_code_intel_buffer {
+ uint32_t bytes;
+ union {
+ uint8_t data[4];
+ uint32_t value;
+ };
+};
+
+struct _ec_code_intel_sib {
+ gf_boolean_t present;
+ uint32_t base;
+ uint32_t index;
+ uint32_t scale;
+};
+
+struct _ec_code_intel_modrm {
+ gf_boolean_t present;
+ uint32_t mod;
+ uint32_t rm;
+ uint32_t reg;
+};
+
+struct _ec_code_intel_rex {
+ gf_boolean_t present;
+ uint32_t w;
+ uint32_t r;
+ uint32_t x;
+ uint32_t b;
+};
+
+struct _ec_code_intel {
+ gf_boolean_t invalid;
+ ec_code_intel_buffer_t prefix;
+ ec_code_intel_buffer_t opcode;
+ ec_code_intel_buffer_t offset;
+ ec_code_intel_buffer_t immediate;
+ ec_code_intel_buffer_t vex;
+ ec_code_intel_rex_t rex;
+ ec_code_intel_modrm_t modrm;
+ ec_code_intel_sib_t sib;
+ uint32_t reg;
+};
+
+void
+ec_code_intel_op_push_r(ec_code_builder_t *builder, ec_code_intel_reg_t reg);
+void
+ec_code_intel_op_pop_r(ec_code_builder_t *builder, ec_code_intel_reg_t reg);
+void
+ec_code_intel_op_ret(ec_code_builder_t *builder, uint32_t size);
+
+void
+ec_code_intel_op_mov_r2r(ec_code_builder_t *builder, ec_code_intel_reg_t src,
+ ec_code_intel_reg_t dst);
+void
+ec_code_intel_op_mov_r2m(ec_code_builder_t *builder, ec_code_intel_reg_t src,
+ ec_code_intel_reg_t base, ec_code_intel_reg_t index,
+ uint32_t scale, int32_t offset);
+void
+ec_code_intel_op_mov_m2r(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, ec_code_intel_reg_t dst);
+void
+ec_code_intel_op_xor_r2r(ec_code_builder_t *builder, ec_code_intel_reg_t src,
+ ec_code_intel_reg_t dst);
+void
+ec_code_intel_op_xor_m2r(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, ec_code_intel_reg_t dst);
+void
+ec_code_intel_op_add_i2r(ec_code_builder_t *builder, int32_t value,
+ ec_code_intel_reg_t reg);
+void
+ec_code_intel_op_test_i2r(ec_code_builder_t *builder, uint32_t value,
+ ec_code_intel_reg_t reg);
+void
+ec_code_intel_op_jne(ec_code_builder_t *builder, uint32_t address);
+
+void
+ec_code_intel_op_mov_sse2sse(ec_code_builder_t *builder, uint32_t src,
+ uint32_t dst);
+void
+ec_code_intel_op_mov_sse2m(ec_code_builder_t *builder, uint32_t src,
+ ec_code_intel_reg_t base, ec_code_intel_reg_t index,
+ uint32_t scale, int32_t offset);
+void
+ec_code_intel_op_mov_m2sse(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, uint32_t dst);
+void
+ec_code_intel_op_xor_sse2sse(ec_code_builder_t *builder, uint32_t src,
+ uint32_t dst);
+void
+ec_code_intel_op_xor_m2sse(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, uint32_t dst);
+
+void
+ec_code_intel_op_mov_avx2avx(ec_code_builder_t *builder, uint32_t src,
+ uint32_t dst);
+void
+ec_code_intel_op_mov_avx2m(ec_code_builder_t *builder, uint32_t src,
+ ec_code_intel_reg_t base, ec_code_intel_reg_t index,
+ uint32_t scale, int32_t offset);
+void
+ec_code_intel_op_mov_m2avx(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, uint32_t dst);
+void
+ec_code_intel_op_xor_avx2avx(ec_code_builder_t *builder, uint32_t src,
+ uint32_t dst);
+void
+ec_code_intel_op_xor_m2avx(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, uint32_t dst);
+
+#endif /* __EC_CODE_INTEL_H__ */
diff --git a/xlators/cluster/ec/src/ec-code-sse.c b/xlators/cluster/ec/src/ec-code-sse.c
new file mode 100644
index 00000000000..e11e7ff8400
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code-sse.c
@@ -0,0 +1,101 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <errno.h>
+
+#include "ec-code-intel.h"
+
+static void
+ec_code_sse_prolog(ec_code_builder_t *builder)
+{
+ builder->loop = builder->address;
+}
+
+static void
+ec_code_sse_epilog(ec_code_builder_t *builder)
+{
+ ec_code_intel_op_add_i2r(builder, 16, REG_DX);
+ ec_code_intel_op_add_i2r(builder, 16, REG_DI);
+ ec_code_intel_op_test_i2r(builder, builder->width - 1, REG_DX);
+ ec_code_intel_op_jne(builder, builder->loop);
+
+ ec_code_intel_op_ret(builder, 0);
+}
+
+static void
+ec_code_sse_load(ec_code_builder_t *builder, uint32_t dst, uint32_t idx,
+ uint32_t bit)
+{
+ if (builder->linear) {
+ ec_code_intel_op_mov_m2sse(
+ builder, REG_SI, REG_DX, 1,
+ idx * builder->width * builder->bits + bit * builder->width, dst);
+ } else {
+ if (builder->base != idx) {
+ ec_code_intel_op_mov_m2r(builder, REG_SI, REG_NULL, 0, idx * 8,
+ REG_AX);
+ builder->base = idx;
+ }
+ ec_code_intel_op_mov_m2sse(builder, REG_AX, REG_DX, 1,
+ bit * builder->width, dst);
+ }
+}
+
+static void
+ec_code_sse_store(ec_code_builder_t *builder, uint32_t src, uint32_t bit)
+{
+ ec_code_intel_op_mov_sse2m(builder, src, REG_DI, REG_NULL, 0,
+ bit * builder->width);
+}
+
+static void
+ec_code_sse_copy(ec_code_builder_t *builder, uint32_t dst, uint32_t src)
+{
+ ec_code_intel_op_mov_sse2sse(builder, src, dst);
+}
+
+static void
+ec_code_sse_xor2(ec_code_builder_t *builder, uint32_t dst, uint32_t src)
+{
+ ec_code_intel_op_xor_sse2sse(builder, src, dst);
+}
+
+static void
+ec_code_sse_xorm(ec_code_builder_t *builder, uint32_t dst, uint32_t idx,
+ uint32_t bit)
+{
+ if (builder->linear) {
+ ec_code_intel_op_xor_m2sse(
+ builder, REG_SI, REG_DX, 1,
+ idx * builder->width * builder->bits + bit * builder->width, dst);
+ } else {
+ if (builder->base != idx) {
+ ec_code_intel_op_mov_m2r(builder, REG_SI, REG_NULL, 0, idx * 8,
+ REG_AX);
+ builder->base = idx;
+ }
+ ec_code_intel_op_xor_m2sse(builder, REG_AX, REG_DX, 1,
+ bit * builder->width, dst);
+ }
+}
+
+static char *ec_code_sse_needed_flags[] = {"sse2", NULL};
+
+ec_code_gen_t ec_code_gen_sse = {.name = "sse",
+ .flags = ec_code_sse_needed_flags,
+ .width = 16,
+ .prolog = ec_code_sse_prolog,
+ .epilog = ec_code_sse_epilog,
+ .load = ec_code_sse_load,
+ .store = ec_code_sse_store,
+ .copy = ec_code_sse_copy,
+ .xor2 = ec_code_sse_xor2,
+ .xor3 = NULL,
+ .xorm = ec_code_sse_xorm};
diff --git a/xlators/features/changetimerecorder/src/ctr_mem_types.h b/xlators/cluster/ec/src/ec-code-sse.h
index 48387597814..f1acbcf894b 100644
--- a/xlators/features/changetimerecorder/src/ctr_mem_types.h
+++ b/xlators/cluster/ec/src/ec-code-sse.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -8,15 +8,11 @@
cases as published by the Free Software Foundation.
*/
+#ifndef __EC_CODE_SSE_H__
+#define __EC_CODE_SSE_H__
-#ifndef __CTR_MEM_TYPES_H__
-#define __CTR_MEM_TYPES_H__
+#include "ec-code.h"
-#include "gfdb_mem-types.h"
-
-enum gf_ctr_mem_types_ {
- gf_ctr_mt_private_t = gfdb_mt_end + 1,
- gf_ctr_mt_end
-};
-#endif
+extern ec_code_gen_t ec_code_gen_sse;
+#endif /* __EC_CODE_SSE_H__ */
diff --git a/xlators/cluster/ec/src/ec-code-x64.c b/xlators/cluster/ec/src/ec-code-x64.c
new file mode 100644
index 00000000000..26565b4493f
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code-x64.c
@@ -0,0 +1,144 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <errno.h>
+
+#include "ec-code-intel.h"
+
+static ec_code_intel_reg_t ec_code_x64_regmap[] = {
+ REG_AX, REG_CX, REG_BP, REG_8, REG_9, REG_10,
+ REG_11, REG_12, REG_13, REG_14, REG_15};
+
+static void
+ec_code_x64_prolog(ec_code_builder_t *builder)
+{
+ uint32_t i;
+
+ ec_code_intel_op_push_r(builder, REG_BP);
+ if (!builder->linear) {
+ ec_code_intel_op_push_r(builder, REG_BX);
+ }
+ if (builder->regs > 11) {
+ ec_code_error(builder, EINVAL);
+ return;
+ }
+ for (i = 7; i < builder->regs; i++) {
+ ec_code_intel_op_push_r(builder, ec_code_x64_regmap[i]);
+ }
+
+ builder->loop = builder->address;
+}
+
+static void
+ec_code_x64_epilog(ec_code_builder_t *builder)
+{
+ uint32_t i;
+
+ ec_code_intel_op_add_i2r(builder, 8, REG_DX);
+ ec_code_intel_op_add_i2r(builder, 8, REG_DI);
+ ec_code_intel_op_test_i2r(builder, builder->width - 1, REG_DX);
+ ec_code_intel_op_jne(builder, builder->loop);
+
+ if (builder->regs > 11) {
+ ec_code_error(builder, EINVAL);
+ return;
+ }
+ for (i = builder->regs; i > 7; i--) {
+ ec_code_intel_op_pop_r(builder, ec_code_x64_regmap[i - 1]);
+ }
+ if (!builder->linear) {
+ ec_code_intel_op_pop_r(builder, REG_BX);
+ }
+ ec_code_intel_op_pop_r(builder, REG_BP);
+ ec_code_intel_op_ret(builder, 0);
+}
+
+static void
+ec_code_x64_load(ec_code_builder_t *builder, uint32_t dst, uint32_t idx,
+ uint32_t bit)
+{
+ dst = ec_code_x64_regmap[dst];
+
+ if (builder->linear) {
+ ec_code_intel_op_mov_m2r(
+ builder, REG_SI, REG_DX, 1,
+ idx * builder->width * builder->bits + bit * builder->width, dst);
+ } else {
+ if (builder->base != idx) {
+ ec_code_intel_op_mov_m2r(builder, REG_SI, REG_NULL, 0, idx * 8,
+ REG_BX);
+ builder->base = idx;
+ }
+ ec_code_intel_op_mov_m2r(builder, REG_BX, REG_DX, 1,
+ bit * builder->width, dst);
+ }
+}
+
+static void
+ec_code_x64_store(ec_code_builder_t *builder, uint32_t src, uint32_t bit)
+{
+ src = ec_code_x64_regmap[src];
+
+ ec_code_intel_op_mov_r2m(builder, src, REG_DI, REG_NULL, 0,
+ bit * builder->width);
+}
+
+static void
+ec_code_x64_copy(ec_code_builder_t *builder, uint32_t dst, uint32_t src)
+{
+ dst = ec_code_x64_regmap[dst];
+ src = ec_code_x64_regmap[src];
+
+ ec_code_intel_op_mov_r2r(builder, src, dst);
+}
+
+static void
+ec_code_x64_xor2(ec_code_builder_t *builder, uint32_t dst, uint32_t src)
+{
+ dst = ec_code_x64_regmap[dst];
+ src = ec_code_x64_regmap[src];
+
+ ec_code_intel_op_xor_r2r(builder, src, dst);
+}
+
+static void
+ec_code_x64_xorm(ec_code_builder_t *builder, uint32_t dst, uint32_t idx,
+ uint32_t bit)
+{
+ dst = ec_code_x64_regmap[dst];
+
+ if (builder->linear) {
+ ec_code_intel_op_xor_m2r(
+ builder, REG_SI, REG_DX, 1,
+ idx * builder->width * builder->bits + bit * builder->width, dst);
+ } else {
+ if (builder->base != idx) {
+ ec_code_intel_op_mov_m2r(builder, REG_SI, REG_NULL, 0, idx * 8,
+ REG_BX);
+ builder->base = idx;
+ }
+ ec_code_intel_op_xor_m2r(builder, REG_BX, REG_DX, 1,
+ bit * builder->width, dst);
+ }
+}
+
+static char *ec_code_x64_needed_flags[] = {NULL};
+
+ec_code_gen_t ec_code_gen_x64 = {.name = "x64",
+ .flags = ec_code_x64_needed_flags,
+ .width = sizeof(uint64_t),
+ .prolog = ec_code_x64_prolog,
+ .epilog = ec_code_x64_epilog,
+ .load = ec_code_x64_load,
+ .store = ec_code_x64_store,
+ .copy = ec_code_x64_copy,
+ .xor2 = ec_code_x64_xor2,
+ .xor3 = NULL,
+ .xorm = ec_code_x64_xorm};
diff --git a/xlators/cluster/ec/src/ec-code-x64.h b/xlators/cluster/ec/src/ec-code-x64.h
new file mode 100644
index 00000000000..bd8174e4bf5
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code-x64.h
@@ -0,0 +1,18 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_CODE_X64_H__
+#define __EC_CODE_X64_H__
+
+#include "ec-code.h"
+
+extern ec_code_gen_t ec_code_gen_x64;
+
+#endif /* __EC_CODE_X64_H__ */
diff --git a/xlators/cluster/ec/src/ec-code.c b/xlators/cluster/ec/src/ec-code.c
new file mode 100644
index 00000000000..03162ae05a9
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code.c
@@ -0,0 +1,1060 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <ctype.h>
+
+#include <glusterfs/syscall.h>
+
+#include "ec-mem-types.h"
+#include "ec-code.h"
+#include "ec-messages.h"
+#include "ec-code-c.h"
+#include "ec-helpers.h"
+
+#ifdef USE_EC_DYNAMIC_X64
+#include "ec-code-x64.h"
+#endif
+
+#ifdef USE_EC_DYNAMIC_SSE
+#include "ec-code-sse.h"
+#endif
+
+#ifdef USE_EC_DYNAMIC_AVX
+#include "ec-code-avx.h"
+#endif
+
+#define EC_CODE_SIZE (1024 * 64)
+#define EC_CODE_ALIGN 4096
+
+#define EC_CODE_CHUNK_MIN_SIZE 512
+
+#define EC_PROC_BUFFER_SIZE 4096
+
+#define PROC_CPUINFO "/proc/cpuinfo"
+
+struct _ec_code_proc;
+typedef struct _ec_code_proc ec_code_proc_t;
+
+struct _ec_code_proc {
+ int32_t fd;
+ gf_boolean_t eof;
+ gf_boolean_t error;
+ gf_boolean_t skip;
+ ssize_t size;
+ ssize_t pos;
+ char buffer[EC_PROC_BUFFER_SIZE];
+};
+
+static ec_code_gen_t *ec_code_gen_table[] = {
+#ifdef USE_EC_DYNAMIC_AVX
+ &ec_code_gen_avx,
+#endif
+#ifdef USE_EC_DYNAMIC_SSE
+ &ec_code_gen_sse,
+#endif
+#ifdef USE_EC_DYNAMIC_X64
+ &ec_code_gen_x64,
+#endif
+ NULL};
+
+static void
+ec_code_arg_set(ec_code_arg_t *arg, uint32_t value)
+{
+ arg->value = value;
+}
+
+static void
+ec_code_arg_assign(ec_code_builder_t *builder, ec_code_op_t *op,
+ ec_code_arg_t *arg, uint32_t reg)
+{
+ arg->value = reg;
+
+ if (builder->regs <= reg) {
+ builder->regs = reg + 1;
+ }
+}
+
+static void
+ec_code_arg_use(ec_code_builder_t *builder, ec_code_op_t *op,
+ ec_code_arg_t *arg, uint32_t reg)
+{
+ arg->value = reg;
+}
+
+static void
+ec_code_arg_update(ec_code_builder_t *builder, ec_code_op_t *op,
+ ec_code_arg_t *arg, uint32_t reg)
+{
+ arg->value = reg;
+}
+
+static ec_code_op_t *
+ec_code_op_next(ec_code_builder_t *builder)
+{
+ ec_code_op_t *op;
+
+ op = &builder->ops[builder->count++];
+ memset(op, 0, sizeof(ec_code_op_t));
+
+ return op;
+}
+
+static void
+ec_code_load(ec_code_builder_t *builder, uint32_t bit, uint32_t offset)
+{
+ ec_code_op_t *op;
+
+ op = ec_code_op_next(builder);
+
+ op->op = EC_GF_OP_LOAD;
+ ec_code_arg_assign(builder, op, &op->arg1, builder->map[bit]);
+ ec_code_arg_set(&op->arg2, offset);
+ ec_code_arg_set(&op->arg3, bit);
+}
+
+static void
+ec_code_store(ec_code_builder_t *builder, uint32_t reg, uint32_t bit)
+{
+ ec_code_op_t *op;
+
+ op = ec_code_op_next(builder);
+
+ op->op = EC_GF_OP_STORE;
+ ec_code_arg_use(builder, op, &op->arg1, builder->map[reg]);
+ ec_code_arg_set(&op->arg2, 0);
+ ec_code_arg_set(&op->arg3, bit);
+}
+
+static void
+ec_code_copy(ec_code_builder_t *builder, uint32_t dst, uint32_t src)
+{
+ ec_code_op_t *op;
+
+ op = ec_code_op_next(builder);
+
+ op->op = EC_GF_OP_COPY;
+ ec_code_arg_assign(builder, op, &op->arg1, builder->map[dst]);
+ ec_code_arg_use(builder, op, &op->arg2, builder->map[src]);
+ ec_code_arg_set(&op->arg3, 0);
+}
+
+static void
+ec_code_xor2(ec_code_builder_t *builder, uint32_t dst, uint32_t src)
+{
+ ec_code_op_t *op;
+
+ op = ec_code_op_next(builder);
+
+ op->op = EC_GF_OP_XOR2;
+ ec_code_arg_update(builder, op, &op->arg1, builder->map[dst]);
+ ec_code_arg_use(builder, op, &op->arg2, builder->map[src]);
+ ec_code_arg_set(&op->arg3, 0);
+}
+
+static void
+ec_code_xor3(ec_code_builder_t *builder, uint32_t dst, uint32_t src1,
+ uint32_t src2)
+{
+ ec_code_op_t *op;
+
+ if (builder->code->gen->xor3 == NULL) {
+ ec_code_copy(builder, dst, src1);
+ ec_code_xor2(builder, dst, src2);
+
+ return;
+ }
+
+ op = ec_code_op_next(builder);
+
+ op->op = EC_GF_OP_XOR3;
+ ec_code_arg_assign(builder, op, &op->arg1, builder->map[dst]);
+ ec_code_arg_use(builder, op, &op->arg2, builder->map[src1]);
+ ec_code_arg_use(builder, op, &op->arg3, builder->map[src2]);
+}
+
+static void
+ec_code_xorm(ec_code_builder_t *builder, uint32_t bit, uint32_t offset)
+{
+ ec_code_op_t *op;
+
+ op = ec_code_op_next(builder);
+
+ op->op = EC_GF_OP_XORM;
+ ec_code_arg_update(builder, op, &op->arg1, builder->map[bit]);
+ ec_code_arg_set(&op->arg2, offset);
+ ec_code_arg_set(&op->arg3, bit);
+}
+
+static void
+ec_code_dup(ec_code_builder_t *builder, ec_gf_op_t *op)
+{
+ switch (op->op) {
+ case EC_GF_OP_COPY:
+ ec_code_copy(builder, op->arg1, op->arg2);
+ break;
+ case EC_GF_OP_XOR2:
+ ec_code_xor2(builder, op->arg1, op->arg2);
+ break;
+ case EC_GF_OP_XOR3:
+ ec_code_xor3(builder, op->arg1, op->arg2, op->arg3);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+ec_code_gf_load(ec_code_builder_t *builder, uint32_t offset)
+{
+ uint32_t i;
+
+ for (i = 0; i < builder->code->gf->bits; i++) {
+ ec_code_load(builder, i, offset);
+ }
+}
+
+static void
+ec_code_gf_load_xor(ec_code_builder_t *builder, uint32_t offset)
+{
+ uint32_t i;
+
+ for (i = 0; i < builder->code->gf->bits; i++) {
+ ec_code_xorm(builder, i, offset);
+ }
+}
+
+static void
+ec_code_gf_store(ec_code_builder_t *builder)
+{
+ uint32_t i;
+
+ for (i = 0; i < builder->code->gf->bits; i++) {
+ ec_code_store(builder, i, i);
+ }
+}
+
+static void
+ec_code_gf_clear(ec_code_builder_t *builder)
+{
+ uint32_t i;
+
+ ec_code_xor2(builder, 0, 0);
+ for (i = 0; i < builder->code->gf->bits; i++) {
+ ec_code_store(builder, 0, i);
+ }
+}
+
+static void
+ec_code_gf_mul(ec_code_builder_t *builder, uint32_t value)
+{
+ ec_gf_mul_t *mul;
+ ec_gf_op_t *op;
+ uint32_t map[EC_GF_MAX_REGS];
+ int32_t i;
+
+ mul = builder->code->gf->table[value];
+ for (op = mul->ops; op->op != EC_GF_OP_END; op++) {
+ ec_code_dup(builder, op);
+ }
+
+ for (i = 0; i < mul->regs; i++) {
+ map[i] = builder->map[mul->map[i]];
+ }
+ memcpy(builder->map, map, sizeof(uint32_t) * mul->regs);
+}
+
+static ec_code_builder_t *
+ec_code_prepare(ec_code_t *code, uint32_t count, uint32_t width,
+ gf_boolean_t linear)
+{
+ ec_code_builder_t *builder;
+ uint32_t i;
+
+ count *= code->gf->bits + code->gf->max_ops;
+ count += code->gf->bits;
+ builder = GF_MALLOC(
+ sizeof(ec_code_builder_t) + sizeof(ec_code_op_t) * count,
+ ec_mt_ec_code_builder_t);
+ if (builder == NULL) {
+ return EC_ERR(ENOMEM);
+ }
+
+ builder->address = 0;
+ builder->code = code;
+ builder->size = 0;
+ builder->count = 0;
+ builder->regs = 0;
+ builder->error = 0;
+ builder->bits = code->gf->bits;
+ builder->width = width;
+ builder->data = NULL;
+ builder->linear = linear;
+ builder->base = -1;
+
+ for (i = 0; i < EC_GF_MAX_REGS; i++) {
+ builder->map[i] = i;
+ }
+
+ return builder;
+}
+
+static size_t
+ec_code_space_size(void)
+{
+ return (sizeof(ec_code_space_t) + 15) & ~15;
+}
+
+static size_t
+ec_code_chunk_size(void)
+{
+ return (sizeof(ec_code_chunk_t) + 15) & ~15;
+}
+
+static ec_code_chunk_t *
+ec_code_chunk_from_space(ec_code_space_t *space)
+{
+ return (ec_code_chunk_t *)((uintptr_t)space + ec_code_space_size());
+}
+
+static void *
+ec_code_to_executable(ec_code_space_t *space, void *addr)
+{
+ return (void *)((uintptr_t)addr - (uintptr_t)space +
+ (uintptr_t)space->exec);
+}
+
+static void *
+ec_code_from_executable(ec_code_space_t *space, void *addr)
+{
+ return (void *)((uintptr_t)addr - (uintptr_t)space->exec +
+ (uintptr_t)space);
+}
+
+static void *
+ec_code_func_from_chunk(ec_code_chunk_t *chunk, void **exec)
+{
+ void *addr;
+
+ addr = (void *)((uintptr_t)chunk + ec_code_chunk_size());
+
+ *exec = ec_code_to_executable(chunk->space, addr);
+
+ return addr;
+}
+
+static ec_code_chunk_t *
+ec_code_chunk_from_func(ec_code_func_linear_t func)
+{
+ ec_code_chunk_t *chunk;
+
+ chunk = (ec_code_chunk_t *)((uintptr_t)func - ec_code_chunk_size());
+
+ return ec_code_from_executable(chunk->space, chunk);
+}
+
+static ec_code_chunk_t *
+ec_code_chunk_split(ec_code_chunk_t *chunk, size_t size)
+{
+ ec_code_chunk_t *extra;
+ ssize_t avail;
+
+ avail = chunk->size - size - ec_code_chunk_size();
+ if (avail > 0) {
+ extra = (ec_code_chunk_t *)((uintptr_t)chunk + chunk->size - avail);
+ extra->space = chunk->space;
+ extra->size = avail;
+ list_add(&extra->list, &chunk->list);
+ chunk->size = size;
+ }
+ list_del_init(&chunk->list);
+
+ return chunk;
+}
+
+static gf_boolean_t
+ec_code_chunk_touch(ec_code_chunk_t *prev, ec_code_chunk_t *next)
+{
+ uintptr_t end;
+
+ end = (uintptr_t)prev + ec_code_chunk_size() + prev->size;
+ return (end == (uintptr_t)next);
+}
+
+static ec_code_space_t *
+ec_code_space_create(ec_code_t *code, size_t size)
+{
+ char path[] = GLUSTERFS_LIBEXECDIR "/ec-code-dynamic.XXXXXX";
+ ec_code_space_t *space;
+ void *exec;
+ int32_t fd, err;
+
+ /* We need to create memory areas to store the generated dynamic code.
+ * Obviously these areas need to be written to be able to create the
+ * code and they also need to be executable to execute it.
+ *
+ * However it's a bad practice to have a memory region that is both
+ * writable *and* executable. In fact, selinux forbids this and causes
+ * attempts to do so to fail (unless specifically configured).
+ *
+ * To solve the problem we'll use two distinct memory areas mapped to
+ * the same physical storage. One of the memory areas will have write
+ * permission, and the other will have execute permission. Both areas
+ * will have the same contents. The physical storage will be a regular
+ * file that will be mmapped to both areas.
+ */
+
+ /* We need to create a temporary file as the backend storage for the
+ * memory mapped areas. */
+ /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
+ fd = mkstemp(path);
+ if (fd < 0) {
+ err = errno;
+ gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED,
+ "Unable to create a temporary file for the ec dynamic "
+ "code");
+ space = EC_ERR(err);
+ goto done;
+ }
+ /* Once created we don't need to keep it in the file system. It will
+ * still exist until we close the last file descriptor or unmap the
+ * memory areas bound to the file. */
+ sys_unlink(path);
+
+ size = (size + EC_CODE_ALIGN - 1) & ~(EC_CODE_ALIGN - 1);
+ if (sys_ftruncate(fd, size) < 0) {
+ err = errno;
+ gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED,
+ "Unable to resize the file for the ec dynamic code");
+ space = EC_ERR(err);
+ goto done_close;
+ }
+
+ /* This creates an executable memory area to be able to run the
+ * generated fragments of code. */
+ exec = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
+ if (exec == MAP_FAILED) {
+ err = errno;
+ gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED,
+ "Unable to map the executable area for the ec dynamic "
+ "code");
+ space = EC_ERR(err);
+ goto done_close;
+ }
+ /* It's not important to check the return value of mlock(). If it fails
+ * everything will continue to work normally. */
+ mlock(exec, size);
+
+ /* This maps a read/write memory area to be able to create the dynamici
+ * code. */
+ space = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (space == MAP_FAILED) {
+ err = errno;
+ gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED,
+ "Unable to map the writable area for the ec dynamic "
+ "code");
+ space = EC_ERR(err);
+
+ munmap(exec, size);
+
+ goto done_close;
+ }
+
+ space->exec = exec;
+ space->size = size;
+ space->code = code;
+ list_add_tail(&space->list, &code->spaces);
+ INIT_LIST_HEAD(&space->chunks);
+
+done_close:
+ /* If everything has succeeded, we already have the memory areas
+ * mapped. We don't need the file descriptor anymore because the
+ * backend storage will be there until the mmap()'d regions are
+ * unmapped. */
+ sys_close(fd);
+done:
+ return space;
+}
+
+static void
+ec_code_space_destroy(ec_code_space_t *space)
+{
+ list_del_init(&space->list);
+
+ munmap(space->exec, space->size);
+ munmap(space, space->size);
+}
+
+static void
+ec_code_chunk_merge(ec_code_chunk_t *chunk)
+{
+ ec_code_chunk_t *item, *tmp;
+
+ list_for_each_entry_safe(item, tmp, &chunk->space->chunks, list)
+ {
+ if ((uintptr_t)item > (uintptr_t)chunk) {
+ list_add_tail(&chunk->list, &item->list);
+ if (ec_code_chunk_touch(chunk, item)) {
+ chunk->size += item->size + ec_code_chunk_size();
+ list_del_init(&item->list);
+ }
+
+ goto check;
+ }
+ if (ec_code_chunk_touch(item, chunk)) {
+ item->size += chunk->size + ec_code_chunk_size();
+ list_del_init(&item->list);
+ chunk = item;
+ }
+ }
+ list_add_tail(&chunk->list, &chunk->space->chunks);
+
+check:
+ if (chunk->size ==
+ chunk->space->size - ec_code_space_size() - ec_code_chunk_size()) {
+ ec_code_space_destroy(chunk->space);
+ }
+}
+
+static ec_code_chunk_t *
+ec_code_space_alloc(ec_code_t *code, size_t size)
+{
+ ec_code_space_t *space;
+ ec_code_chunk_t *chunk;
+ size_t map_size;
+
+ /* To minimize fragmentation, we only allocate chunks of sizes multiples
+ * of EC_CODE_CHUNK_MIN_SIZE. */
+ size = ((size + ec_code_chunk_size() + EC_CODE_CHUNK_MIN_SIZE - 1) &
+ ~(EC_CODE_CHUNK_MIN_SIZE - 1)) -
+ ec_code_chunk_size();
+ list_for_each_entry(space, &code->spaces, list)
+ {
+ list_for_each_entry(chunk, &space->chunks, list)
+ {
+ if (chunk->size >= size) {
+ goto out;
+ }
+ }
+ }
+
+ map_size = EC_CODE_SIZE - ec_code_space_size() - ec_code_chunk_size();
+ if (map_size < size) {
+ map_size = size;
+ }
+ space = ec_code_space_create(code, map_size);
+ if (EC_IS_ERR(space)) {
+ return (ec_code_chunk_t *)space;
+ }
+
+ chunk = ec_code_chunk_from_space(space);
+ chunk->size = map_size - ec_code_space_size() - ec_code_chunk_size();
+ list_add(&chunk->list, &space->chunks);
+
+out:
+ chunk->space = space;
+
+ return ec_code_chunk_split(chunk, size);
+}
+
+static ec_code_chunk_t *
+ec_code_alloc(ec_code_t *code, uint32_t size)
+{
+ ec_code_chunk_t *chunk;
+
+ LOCK(&code->lock);
+
+ chunk = ec_code_space_alloc(code, size);
+
+ UNLOCK(&code->lock);
+
+ return chunk;
+}
+
+static void
+ec_code_free(ec_code_chunk_t *chunk)
+{
+ gf_lock_t *lock;
+
+ lock = &chunk->space->code->lock;
+ LOCK(lock);
+
+ ec_code_chunk_merge(chunk);
+
+ UNLOCK(lock);
+}
+
+static int32_t
+ec_code_write(ec_code_builder_t *builder)
+{
+ ec_code_gen_t *gen;
+ ec_code_op_t *op;
+ uint32_t i;
+
+ builder->error = 0;
+ builder->size = 0;
+ builder->address = 0;
+ builder->base = -1;
+
+ gen = builder->code->gen;
+ gen->prolog(builder);
+ for (i = 0; i < builder->count; i++) {
+ op = &builder->ops[i];
+ switch (op->op) {
+ case EC_GF_OP_LOAD:
+ gen->load(builder, op->arg1.value, op->arg2.value,
+ op->arg3.value);
+ break;
+ case EC_GF_OP_STORE:
+ gen->store(builder, op->arg1.value, op->arg3.value);
+ break;
+ case EC_GF_OP_COPY:
+ gen->copy(builder, op->arg1.value, op->arg2.value);
+ break;
+ case EC_GF_OP_XOR2:
+ gen->xor2(builder, op->arg1.value, op->arg2.value);
+ break;
+ case EC_GF_OP_XOR3:
+ gen->xor3(builder, op->arg1.value, op->arg2.value,
+ op->arg3.value);
+ break;
+ case EC_GF_OP_XORM:
+ gen->xorm(builder, op->arg1.value, op->arg2.value,
+ op->arg3.value);
+ break;
+ default:
+ break;
+ }
+ }
+ gen->epilog(builder);
+
+ return builder->error;
+}
+
+static void *
+ec_code_compile(ec_code_builder_t *builder)
+{
+ ec_code_chunk_t *chunk;
+ void *func;
+ int32_t err;
+
+ err = ec_code_write(builder);
+ if (err != 0) {
+ return EC_ERR(err);
+ }
+
+ chunk = ec_code_alloc(builder->code, builder->size);
+ if (EC_IS_ERR(chunk)) {
+ return chunk;
+ }
+ builder->data = ec_code_func_from_chunk(chunk, &func);
+
+ err = ec_code_write(builder);
+ if (err != 0) {
+ ec_code_free(chunk);
+
+ return EC_ERR(err);
+ }
+
+ GF_FREE(builder);
+
+ return func;
+}
+
+ec_code_t *
+ec_code_create(ec_gf_t *gf, ec_code_gen_t *gen)
+{
+ ec_code_t *code;
+
+ code = GF_MALLOC(sizeof(ec_code_t), ec_mt_ec_code_t);
+ if (code == NULL) {
+ return EC_ERR(ENOMEM);
+ }
+ memset(code, 0, sizeof(ec_code_t));
+ INIT_LIST_HEAD(&code->spaces);
+ LOCK_INIT(&code->lock);
+
+ code->gf = gf;
+ code->gen = gen;
+
+ return code;
+}
+
+void
+ec_code_destroy(ec_code_t *code)
+{
+ if (!list_empty(&code->spaces)) {
+ }
+
+ LOCK_DESTROY(&code->lock);
+
+ GF_FREE(code);
+}
+
+static uint32_t
+ec_code_value_next(uint32_t *values, uint32_t count, uint32_t *offset)
+{
+ uint32_t i, next;
+
+ next = 0;
+ for (i = *offset + 1; i < count; i++) {
+ next = values[i];
+ if (next != 0) {
+ break;
+ }
+ }
+ *offset = i;
+
+ return next;
+}
+
+static void *
+ec_code_build_dynamic(ec_code_t *code, uint32_t width, uint32_t *values,
+ uint32_t count, gf_boolean_t linear)
+{
+ ec_code_builder_t *builder;
+ uint32_t offset, val, next;
+
+ builder = ec_code_prepare(code, count, width, linear);
+ if (EC_IS_ERR(builder)) {
+ return builder;
+ }
+
+ offset = -1;
+ next = ec_code_value_next(values, count, &offset);
+ if (next != 0) {
+ ec_code_gf_load(builder, offset);
+ do {
+ val = next;
+ next = ec_code_value_next(values, count, &offset);
+ if (next != 0) {
+ ec_code_gf_mul(builder, ec_gf_div(code->gf, val, next));
+ ec_code_gf_load_xor(builder, offset);
+ }
+ } while (next != 0);
+ ec_code_gf_mul(builder, val);
+ ec_code_gf_store(builder);
+ } else {
+ ec_code_gf_clear(builder);
+ }
+
+ return ec_code_compile(builder);
+}
+
+static void *
+ec_code_build(ec_code_t *code, uint32_t width, uint32_t *values, uint32_t count,
+ gf_boolean_t linear)
+{
+ void *func;
+
+ if (code->gen != NULL) {
+ func = ec_code_build_dynamic(code, width, values, count, linear);
+ if (!EC_IS_ERR(func)) {
+ return func;
+ }
+
+ gf_msg_debug(THIS->name, GF_LOG_DEBUG,
+ "Unable to generate dynamic code. Falling back "
+ "to precompiled code");
+
+ /* The dynamic code generation shouldn't fail in normal
+ * conditions, but if it fails at some point, it's very
+ * probable that it will fail again, so we completely disable
+ * dynamic code generation. */
+ code->gen = NULL;
+ }
+
+ ec_code_c_prepare(code->gf, values, count);
+
+ if (linear) {
+ return ec_code_c_linear;
+ }
+
+ return ec_code_c_interleaved;
+}
+
+ec_code_func_linear_t
+ec_code_build_linear(ec_code_t *code, uint32_t width, uint32_t *values,
+ uint32_t count)
+{
+ return (ec_code_func_linear_t)ec_code_build(code, width, values, count,
+ _gf_true);
+}
+
+ec_code_func_interleaved_t
+ec_code_build_interleaved(ec_code_t *code, uint32_t width, uint32_t *values,
+ uint32_t count)
+{
+ return (ec_code_func_interleaved_t)ec_code_build(code, width, values, count,
+ _gf_false);
+}
+
+void
+ec_code_release(ec_code_t *code, ec_code_func_t *func)
+{
+ if ((func->linear != ec_code_c_linear) &&
+ (func->interleaved != ec_code_c_interleaved)) {
+ ec_code_free(ec_code_chunk_from_func(func->linear));
+ }
+}
+
+void
+ec_code_error(ec_code_builder_t *builder, int32_t error)
+{
+ if (builder->error == 0) {
+ gf_msg(THIS->name, GF_LOG_ERROR, error, EC_MSG_DYN_CODEGEN_FAILED,
+ "Failed to generate dynamic code");
+ builder->error = error;
+ }
+}
+
+void
+ec_code_emit(ec_code_builder_t *builder, uint8_t *bytes, uint32_t count)
+{
+ if (builder->error != 0) {
+ return;
+ }
+
+ if (builder->data != NULL) {
+ memcpy(builder->data + builder->size, bytes, count);
+ }
+
+ builder->size += count;
+ builder->address += count;
+}
+
+static char *
+ec_code_proc_trim_left(char *text, ssize_t *length)
+{
+ ssize_t len;
+
+ for (len = *length; (len > 0) && isspace(*text); len--) {
+ text++;
+ }
+ *length = len;
+
+ return text;
+}
+
+static char *
+ec_code_proc_trim_right(char *text, ssize_t *length, char sep)
+{
+ char *last;
+ ssize_t len;
+
+ len = *length;
+
+ last = text;
+ for (len = *length; (len > 0) && (*text != sep); len--) {
+ if (!isspace(*text)) {
+ last = text + 1;
+ }
+ text++;
+ }
+ *last = 0;
+ *length = len;
+
+ return text;
+}
+
+static char *
+ec_code_proc_line_parse(ec_code_proc_t *file, ssize_t *length)
+{
+ char *text, *end;
+ ssize_t len;
+
+ len = file->size - file->pos;
+ text = ec_code_proc_trim_left(file->buffer + file->pos, &len);
+ end = ec_code_proc_trim_right(text, &len, '\n');
+ if (len == 0) {
+ if (!file->eof) {
+ if (text == file->buffer) {
+ file->size = file->pos = 0;
+ file->skip = _gf_true;
+ } else {
+ file->size = file->pos = end - text;
+ memmove(file->buffer, text, file->pos + 1);
+ }
+ len = sys_read(file->fd, file->buffer + file->pos,
+ sizeof(file->buffer) - file->pos - 1);
+ if (len > 0) {
+ file->size += len;
+ }
+ file->error = len < 0;
+ file->eof = len <= 0;
+
+ return NULL;
+ }
+ file->size = file->pos = 0;
+ } else {
+ file->pos = end - file->buffer + 1;
+ }
+
+ *length = end - text;
+
+ if (file->skip) {
+ file->skip = _gf_false;
+ text = NULL;
+ }
+
+ return text;
+}
+
+static char *
+ec_code_proc_line(ec_code_proc_t *file, ssize_t *length)
+{
+ char *text;
+
+ text = NULL;
+ while (!file->eof) {
+ text = ec_code_proc_line_parse(file, length);
+ if (text != NULL) {
+ break;
+ }
+ }
+
+ return text;
+}
+
+static char *
+ec_code_proc_split(char *text, ssize_t *length, char sep)
+{
+ text = ec_code_proc_trim_right(text, length, sep);
+ if (*length == 0) {
+ return NULL;
+ }
+ (*length)--;
+ text++;
+
+ return ec_code_proc_trim_left(text, length);
+}
+
+static uint32_t
+ec_code_cpu_check(uint32_t idx, char *list, uint32_t count)
+{
+ ec_code_gen_t *gen;
+ char **ptr;
+ char *table[count + 1];
+ uint32_t i;
+
+ for (i = 0; i < count; i++) {
+ table[i] = list;
+ list += strlen(list) + 1;
+ }
+
+ gen = ec_code_gen_table[idx];
+ while (gen != NULL) {
+ for (ptr = gen->flags; *ptr != NULL; ptr++) {
+ for (i = 0; i < count; i++) {
+ if (strcmp(*ptr, table[i]) == 0) {
+ break;
+ }
+ }
+ if (i >= count) {
+ gen = ec_code_gen_table[++idx];
+ break;
+ }
+ }
+ if (*ptr == NULL) {
+ break;
+ }
+ }
+
+ return idx;
+}
+
+ec_code_gen_t *
+ec_code_detect(xlator_t *xl, const char *def)
+{
+ ec_code_proc_t file;
+ ec_code_gen_t *gen = NULL;
+ char *line, *data, *list;
+ ssize_t length;
+ uint32_t count, base, select;
+
+ if (strcmp(def, "none") == 0) {
+ gf_msg(xl->name, GF_LOG_INFO, 0, EC_MSG_EXTENSION_NONE,
+ "Not using any cpu extensions");
+
+ return NULL;
+ }
+
+ file.fd = sys_open(PROC_CPUINFO, O_RDONLY, 0);
+ if (file.fd < 0) {
+ goto out;
+ }
+ file.size = file.pos = 0;
+ file.eof = file.error = file.skip = _gf_false;
+
+ select = 0;
+ if (strcmp(def, "auto") != 0) {
+ while (ec_code_gen_table[select] != NULL) {
+ if (strcmp(ec_code_gen_table[select]->name, def) == 0) {
+ break;
+ }
+ select++;
+ }
+ if (ec_code_gen_table[select] == NULL) {
+ gf_msg(xl->name, GF_LOG_WARNING, EINVAL, EC_MSG_EXTENSION_UNKNOWN,
+ "CPU extension '%s' is not known. Not using any cpu "
+ "extensions",
+ def);
+
+ return NULL;
+ }
+ } else {
+ def = NULL;
+ }
+
+ while ((line = ec_code_proc_line(&file, &length)) != NULL) {
+ data = ec_code_proc_split(line, &length, ':');
+ if ((data != NULL) && (strcmp(line, "flags") == 0)) {
+ list = data;
+ count = 0;
+ while ((data != NULL) && (*data != 0)) {
+ count++;
+ data = ec_code_proc_split(data, &length, ' ');
+ }
+ base = select;
+ select = ec_code_cpu_check(select, list, count);
+ if ((base != select) && (def != NULL)) {
+ gf_msg(xl->name, GF_LOG_WARNING, ENOTSUP,
+ EC_MSG_EXTENSION_UNSUPPORTED,
+ "CPU extension '%s' is not supported", def);
+ def = NULL;
+ }
+ }
+ }
+
+ if (file.error) {
+ gf_msg(xl->name, GF_LOG_WARNING, 0, EC_MSG_EXTENSION_FAILED,
+ "Unable to determine supported CPU extensions. Not using any "
+ "cpu extensions");
+
+ gen = NULL;
+ } else {
+ gen = ec_code_gen_table[select];
+ if (gen == NULL) {
+ gf_msg(xl->name, GF_LOG_INFO, 0, EC_MSG_EXTENSION_NONE,
+ "Not using any cpu extensions");
+ } else {
+ gf_msg(xl->name, GF_LOG_INFO, 0, EC_MSG_EXTENSION,
+ "Using '%s' CPU extensions", gen->name);
+ }
+ }
+
+ sys_close(file.fd);
+
+out:
+ return gen;
+}
diff --git a/xlators/cluster/ec/src/ec-code.h b/xlators/cluster/ec/src/ec-code.h
new file mode 100644
index 00000000000..75fb35d93e3
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code.h
@@ -0,0 +1,44 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_CODE_H__
+#define __EC_CODE_H__
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/list.h>
+
+#include "ec-types.h"
+#include "ec-galois.h"
+
+ec_code_gen_t *
+ec_code_detect(xlator_t *xl, const char *def);
+
+ec_code_t *
+ec_code_create(ec_gf_t *gf, ec_code_gen_t *gen);
+
+void
+ec_code_destroy(ec_code_t *code);
+
+ec_code_func_linear_t
+ec_code_build_linear(ec_code_t *code, uint32_t width, uint32_t *values,
+ uint32_t count);
+ec_code_func_interleaved_t
+ec_code_build_interleaved(ec_code_t *code, uint32_t width, uint32_t *values,
+ uint32_t count);
+void
+ec_code_release(ec_code_t *code, ec_code_func_t *func);
+
+void
+ec_code_error(ec_code_builder_t *builder, int32_t error);
+
+void
+ec_code_emit(ec_code_builder_t *builder, uint8_t *bytes, uint32_t count);
+
+#endif /* __EC_CODE_H__ */
diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c
index e84055c51e4..703a30e2485 100644
--- a/xlators/cluster/ec/src/ec-combine.c
+++ b/xlators/cluster/ec/src/ec-combine.c
@@ -11,57 +11,57 @@
#include <fnmatch.h>
#include "libxlator.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
-#include "ec-data.h"
+#include "ec-types.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
-#include "quota-common-utils.h"
+#include "ec-messages.h"
+#include <glusterfs/quota-common-utils.h>
#define EC_QUOTA_PREFIX "trusted.glusterfs.quota."
+#define EC_MISSING_DATA ((data_t *)1ULL)
+
struct _ec_dict_info;
typedef struct _ec_dict_info ec_dict_info_t;
struct _ec_dict_combine;
typedef struct _ec_dict_combine ec_dict_combine_t;
-struct _ec_dict_info
-{
- dict_t * dict;
- int32_t count;
+struct _ec_dict_info {
+ dict_t *dict;
+ int32_t count;
};
-struct _ec_dict_combine
-{
- ec_cbk_data_t * cbk;
- int32_t which;
+struct _ec_dict_combine {
+ ec_cbk_data_t *cbk;
+ int32_t which;
};
int32_t
-ec_combine_write (ec_fop_data_t *fop, ec_cbk_data_t *dst,
- ec_cbk_data_t *src)
+ec_combine_write(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
{
- int valid = 0;
+ int valid = 0;
- if (!fop || !dst || !src)
- return 0;
+ if (!fop || !dst || !src)
+ return 0;
- switch (fop->id) {
+ switch (fop->id) {
case GF_FOP_REMOVEXATTR:
case GF_FOP_FREMOVEXATTR:
case GF_FOP_SETXATTR:
case GF_FOP_FSETXATTR:
- return 1;
+ return 1;
case GF_FOP_SYMLINK:
case GF_FOP_LINK:
case GF_FOP_CREATE:
case GF_FOP_MKNOD:
case GF_FOP_MKDIR:
- valid = 3;
- break;
+ valid = 3;
+ break;
case GF_FOP_UNLINK:
case GF_FOP_RMDIR:
case GF_FOP_SETATTR:
@@ -72,75 +72,129 @@ ec_combine_write (ec_fop_data_t *fop, ec_cbk_data_t *dst,
case GF_FOP_FALLOCATE:
case GF_FOP_DISCARD:
case GF_FOP_ZEROFILL:
- valid = 2;
- break;
+ valid = 2;
+ break;
case GF_FOP_RENAME:
- valid = 5;
- break;
+ valid = 5;
+ break;
default:
- gf_log_callingfn (fop->xl->name, GF_LOG_WARNING, "Invalid fop "
- "%d", fop->id);
- return 0;
- break;
- }
-
- if (!ec_iatt_combine(dst->iatt, src->iatt, valid)) {
- gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching iatt in "
- "answers of '%s'", gf_fop_list[fop->id]);
+ gf_msg_callingfn(fop->xl->name, GF_LOG_WARNING, EINVAL,
+ EC_MSG_INVALID_FOP, "Invalid fop %d", fop->id);
+ return 0;
+ break;
+ }
- return 0;
- }
- return 1;
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, valid)) {
+ gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_IATT_MISMATCH,
+ "Mismatching iatt in "
+ "answers of '%s'",
+ gf_fop_list[fop->id]);
+ return 0;
+ }
+ return 1;
}
-void ec_iatt_time_merge(uint32_t * dst_sec, uint32_t * dst_nsec,
- uint32_t src_sec, uint32_t src_nsec)
+void
+ec_iatt_time_merge(int64_t *dst_sec, uint32_t *dst_nsec, int64_t src_sec,
+ uint32_t src_nsec)
{
if ((*dst_sec < src_sec) ||
- ((*dst_sec == src_sec) && (*dst_nsec < src_nsec)))
- {
+ ((*dst_sec == src_sec) && (*dst_nsec < src_nsec))) {
*dst_sec = src_sec;
*dst_nsec = src_nsec;
}
}
-int32_t ec_iatt_combine(struct iatt * dst, struct iatt * src, int32_t count)
+static gf_boolean_t
+ec_iatt_is_trusted(ec_fop_data_t *fop, struct iatt *iatt)
{
+ uint64_t ino;
int32_t i;
- for (i = 0; i < count; i++)
- {
+ /* Only the top level fop will have fop->locks filled. */
+ while (fop->parent != NULL) {
+ fop = fop->parent;
+ }
+
+ /* Lookups are special requests always done without locks taken but they
+ * require to be able to identify differences between bricks. Special
+ * handling of these differences is already done in lookup specific code
+ * so we shouldn't ignore any difference here and consider all iatt
+ * structures as trusted. */
+ if (fop->id == GF_FOP_LOOKUP) {
+ return _gf_true;
+ }
+
+ /* Check if the iatt references an inode locked by the current fop */
+ for (i = 0; i < fop->lock_count; i++) {
+ ino = gfid_to_ino(fop->locks[i].lock->loc.inode->gfid);
+ if (iatt->ia_ino == ino) {
+ return _gf_true;
+ }
+ }
+
+ return _gf_false;
+}
+
+int32_t
+ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src,
+ int32_t count)
+{
+ int32_t i;
+ gf_boolean_t failed = _gf_false;
+
+ for (i = 0; i < count; i++) {
+ /* Check for basic fields. These fields must be equal always, even if
+ * the inode is not locked because in these cases the parent inode
+ * will be locked and differences in these fields require changes in
+ * the parent directory. */
if ((dst[i].ia_ino != src[i].ia_ino) ||
- (dst[i].ia_uid != src[i].ia_uid) ||
- (dst[i].ia_gid != src[i].ia_gid) ||
(((dst[i].ia_type == IA_IFBLK) || (dst[i].ia_type == IA_IFCHR)) &&
(dst[i].ia_rdev != src[i].ia_rdev)) ||
- ((dst[i].ia_type == IA_IFREG) &&
- (dst[i].ia_size != src[i].ia_size)) ||
- (st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type) !=
- st_mode_from_ia(src[i].ia_prot, src[i].ia_type)) ||
- (gf_uuid_compare(dst[i].ia_gfid, src[i].ia_gfid) != 0))
- {
- gf_log(THIS->name, GF_LOG_WARNING,
- "Failed to combine iatt (inode: %lu-%lu, links: %u-%u, "
- "uid: %u-%u, gid: %u-%u, rdev: %lu-%lu, size: %lu-%lu, "
- "mode: %o-%o)",
+ (gf_uuid_compare(dst[i].ia_gfid, src[i].ia_gfid) != 0)) {
+ failed = _gf_true;
+ }
+ /* Check for not so stable fields. These fields can change if the
+ * inode is not locked. */
+ if (!failed && ((dst[i].ia_uid != src[i].ia_uid) ||
+ (dst[i].ia_gid != src[i].ia_gid) ||
+ (st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type) !=
+ st_mode_from_ia(src[i].ia_prot, src[i].ia_type)))) {
+ if (ec_iatt_is_trusted(fop, dst)) {
+ /* If the iatt contains information from an inode that is
+ * locked, these differences are real problems, so we need to
+ * report them. Otherwise we ignore them and don't care which
+ * data is returned. */
+ failed = _gf_true;
+ } else {
+ gf_msg_debug(fop->xl->name, 0,
+ "Ignoring iatt differences because inode is not "
+ "locked");
+ }
+ }
+ if (failed) {
+ gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_IATT_COMBINE_FAIL,
+ "Failed to combine iatt (inode: %" PRIu64 "-%" PRIu64
+ ", "
+ "links: %u-%u, uid: %u-%u, gid: %u-%u, "
+ "rdev: %" PRIu64 "-%" PRIu64 ", size: %" PRIu64 "-%" PRIu64
+ ", "
+ "mode: %o-%o), %s",
dst[i].ia_ino, src[i].ia_ino, dst[i].ia_nlink,
- src[i].ia_nlink, dst[i].ia_uid, src[i].ia_uid,
- dst[i].ia_gid, src[i].ia_gid, dst[i].ia_rdev,
- src[i].ia_rdev, dst[i].ia_size, src[i].ia_size,
+ src[i].ia_nlink, dst[i].ia_uid, src[i].ia_uid, dst[i].ia_gid,
+ src[i].ia_gid, dst[i].ia_rdev, src[i].ia_rdev,
+ dst[i].ia_size, src[i].ia_size,
st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type),
- st_mode_from_ia(src[i].ia_prot, dst[i].ia_type));
+ st_mode_from_ia(src[i].ia_prot, dst[i].ia_type),
+ ec_msg_str(fop));
return 0;
}
}
- while (count-- > 0)
- {
+ while (count-- > 0) {
dst[count].ia_blocks += src[count].ia_blocks;
- if (dst[count].ia_blksize < src[count].ia_blksize)
- {
+ if (dst[count].ia_blksize < src[count].ia_blksize) {
dst[count].ia_blksize = src[count].ia_blksize;
}
@@ -155,293 +209,273 @@ int32_t ec_iatt_combine(struct iatt * dst, struct iatt * src, int32_t count)
return 1;
}
-void ec_iatt_rebuild(ec_t * ec, struct iatt * iatt, int32_t count,
- int32_t answers)
+void
+ec_iatt_rebuild(ec_t *ec, struct iatt *iatt, int32_t count, int32_t answers)
{
uint64_t blocks;
- while (count-- > 0)
- {
+ while (count-- > 0) {
blocks = iatt[count].ia_blocks * ec->fragments + answers - 1;
blocks /= answers;
iatt[count].ia_blocks = blocks;
}
}
-int32_t ec_dict_data_compare(dict_t * dict, char * key, data_t * value,
- void * arg)
+gf_boolean_t
+ec_xattr_match(dict_t *dict, char *key, data_t *value, void *arg)
{
- ec_dict_info_t * info = arg;
- data_t * data;
-
- data = dict_get(info->dict, key);
- if (data == NULL)
- {
- gf_log("ec", GF_LOG_DEBUG, "key '%s' found only on one dict", key);
-
- return -1;
+ if ((fnmatch(GF_XATTR_STIME_PATTERN, key, 0) == 0) ||
+ (strcmp(key, GET_LINK_COUNT) == 0) ||
+ (strcmp(key, GLUSTERFS_INODELK_COUNT) == 0) ||
+ (strcmp(key, GLUSTERFS_ENTRYLK_COUNT) == 0) ||
+ (strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0)) {
+ return _gf_false;
}
- info->count--;
+ return _gf_true;
+}
+gf_boolean_t
+ec_value_ignore(char *key)
+{
if ((strcmp(key, GF_CONTENT_KEY) == 0) ||
(strcmp(key, GF_XATTR_PATHINFO_KEY) == 0) ||
(strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0) ||
(strcmp(key, GF_XATTR_LOCKINFO_KEY) == 0) ||
(strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0) ||
- (strncmp(key, GF_XATTR_CLRLK_CMD, strlen(GF_XATTR_CLRLK_CMD)) == 0) ||
- (strncmp(key, EC_QUOTA_PREFIX, strlen(EC_QUOTA_PREFIX)) == 0) ||
- (fnmatch(GF_XATTR_STIME_PATTERN, key, 0) == 0) ||
+ (strcmp(key, GLUSTERFS_INODELK_COUNT) == 0) ||
+ (strcmp(key, GLUSTERFS_ENTRYLK_COUNT) == 0) ||
+ (strncmp(key, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)) == 0) ||
+ (strcmp(key, DHT_IATT_IN_XDATA_KEY) == 0) ||
+ (strncmp(key, EC_QUOTA_PREFIX, SLEN(EC_QUOTA_PREFIX)) == 0) ||
(fnmatch(MARKER_XATTR_PREFIX ".*." XTIME, key, 0) == 0) ||
(fnmatch(GF_XATTR_MARKER_KEY ".*", key, 0) == 0) ||
- (XATTR_IS_NODE_UUID(key)))
- {
- return 0;
- }
-
- if ((data->len != value->len) ||
- (memcmp(data->data, value->data, data->len) != 0))
- {
- gf_log("ec", GF_LOG_DEBUG, "key '%s' is different (size: %u, %u)",
- key, data->len, value->len);
-
- return -1;
+ (XATTR_IS_NODE_UUID(key))) {
+ return _gf_true;
}
- return 0;
+ return _gf_false;
}
-int32_t ec_dict_data_show(dict_t * dict, char * key, data_t * value,
- void * arg)
+int32_t
+ec_dict_compare(dict_t *dict1, dict_t *dict2)
{
- if (dict_get(arg, key) == NULL)
- {
- gf_log("ec", GF_LOG_DEBUG, "key '%s' found only on one dict", key);
- }
-
+ if (are_dicts_equal(dict1, dict2, ec_xattr_match, ec_value_ignore))
+ return 1;
return 0;
}
-int32_t ec_dict_compare(dict_t * dict1, dict_t * dict2)
+static uint32_t
+ec_dict_list(data_t **list, ec_cbk_data_t *cbk, int32_t which, char *key,
+ gf_boolean_t global)
{
- ec_dict_info_t info;
- dict_t * dict;
-
- if (dict1 != NULL)
- {
- info.dict = dict1;
- info.count = dict1->count;
- dict = dict2;
- }
- else if (dict2 != NULL)
- {
- info.dict = dict2;
- info.count = dict2->count;
- dict = dict1;
- }
- else
- {
- return 1;
- }
+ ec_t *ec = cbk->fop->xl->private;
+ ec_cbk_data_t *ans = NULL;
+ dict_t *dict = NULL;
+ data_t *data;
+ uint32_t count;
+ int32_t i;
- if (dict != NULL)
- {
- if (dict_foreach(dict, ec_dict_data_compare, &info) != 0)
- {
- return 0;
+ for (i = 0; i < ec->nodes; i++) {
+ /* We initialize the list with EC_MISSING_DATA if we are
+ * returning a global list or the current subvolume belongs
+ * to the group of the accepted answer. Note that if some
+ * subvolume is known to be down before issuing the request,
+ * we won't have any answer from it, so we set here the
+ * appropriate default value. */
+ if (global || ((cbk->mask & (1ULL << i)) != 0)) {
+ list[i] = EC_MISSING_DATA;
+ } else {
+ list[i] = NULL;
}
}
- if (info.count != 0)
- {
- dict_foreach(info.dict, ec_dict_data_show, dict);
- }
-
- return (info.count == 0);
-}
-
-int32_t ec_dict_list(data_t ** list, int32_t * count, ec_cbk_data_t * cbk,
- int32_t which, char * key)
-{
- ec_cbk_data_t * ans;
- dict_t * dict;
- int32_t i, max;
-
- max = *count;
- i = 0;
- for (ans = cbk; ans != NULL; ans = ans->next)
+ count = 0;
+ list_for_each_entry(ans, &cbk->fop->answer_list, answer_list)
{
- if (i >= max)
- {
- gf_log(cbk->fop->xl->name, GF_LOG_ERROR, "Unexpected number of "
- "dictionaries");
-
- return 0;
- }
-
- dict = (which == EC_COMBINE_XDATA) ? ans->xdata : ans->dict;
- list[i] = dict_get(dict, key);
- if (list[i] == NULL)
- {
- gf_log(cbk->fop->xl->name, GF_LOG_ERROR, "Unexpected missing "
- "dictionary entry");
-
- return 0;
+ if (global || ((cbk->mask & ans->mask) != 0)) {
+ dict = (which == EC_COMBINE_XDATA) ? ans->xdata : ans->dict;
+ data = dict_get(dict, key);
+ if (data != NULL) {
+ list[ans->idx] = data;
+ count++;
+ }
}
-
- i++;
}
- *count = i;
-
- return 1;
+ return count;
}
-char * ec_concat_prepare(xlator_t * xl, char ** sep, char ** post,
- const char * fmt, va_list args)
+int32_t
+ec_concat_prepare(xlator_t *xl, char **str, char **sep, char **post,
+ const char *fmt, va_list args)
{
- char * str, * tmp;
+ char *tmp;
int32_t len;
- len = gf_vasprintf(&str, fmt, args);
- if (len < 0)
- {
- return NULL;
+ len = gf_vasprintf(str, fmt, args);
+ if (len < 0) {
+ return -ENOMEM;
}
- tmp = strchr(str, '{');
- if (tmp == NULL)
- {
+ tmp = strchr(*str, '{');
+ if (tmp == NULL) {
goto out;
}
*tmp++ = 0;
*sep = tmp;
tmp = strchr(tmp, '}');
- if (tmp == NULL)
- {
+ if (tmp == NULL) {
goto out;
}
*tmp++ = 0;
*post = tmp;
- return str;
+ return 0;
out:
- gf_log(xl->name, GF_LOG_ERROR, "Invalid concat format");
+ gf_msg(xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_FORMAT,
+ "Invalid concat format");
- GF_FREE(str);
+ GF_FREE(*str);
- return NULL;
+ return -EINVAL;
}
-int32_t ec_dict_data_concat(const char * fmt, ec_cbk_data_t * cbk,
- int32_t which, char * key, ...)
+static int32_t
+ec_dict_data_concat(ec_cbk_data_t *cbk, int32_t which, char *key, char *new_key,
+ const char *def, gf_boolean_t global, const char *fmt, ...)
{
- data_t * data[cbk->count];
- char * str = NULL, * pre = NULL, * sep, * post;
- dict_t * dict;
+ ec_t *ec = cbk->fop->xl->private;
+ data_t *data[ec->nodes];
+ char *str = NULL, *pre = NULL, *sep, *post;
+ dict_t *dict;
va_list args;
- int32_t i, num, len, prelen, postlen, seplen, tmp;
- int32_t ret = -1;
+ int32_t i, num, len, deflen, prelen, postlen, seplen, tmp;
+ int32_t err;
- num = cbk->count;
- if (!ec_dict_list(data, &num, cbk, which, key))
- {
- return -1;
- }
+ ec_dict_list(data, cbk, which, key, global);
- va_start(args, key);
- pre = ec_concat_prepare(cbk->fop->xl, &sep, &post, fmt, args);
+ va_start(args, fmt);
+ err = ec_concat_prepare(cbk->fop->xl, &pre, &sep, &post, fmt, args);
va_end(args);
- if (pre == NULL)
- {
- return -1;
+ if (err != 0) {
+ return err;
}
prelen = strlen(pre);
seplen = strlen(sep);
postlen = strlen(post);
- len = prelen + (num - 1) * seplen + postlen + 1;
- for (i = 0; i < num; i++)
- {
- len += data[i]->len - 1;
+ deflen = 0;
+ if (def != NULL) {
+ deflen = strlen(def);
+ }
+
+ len = prelen + postlen + 1;
+ num = -1;
+ for (i = 0; i < ec->nodes; i++) {
+ if (data[i] == NULL) {
+ continue;
+ }
+ if (data[i] == EC_MISSING_DATA) {
+ if (def == NULL) {
+ continue;
+ }
+ len += deflen;
+ } else {
+ len += data[i]->len - 1;
+ }
+ if (num >= 0) {
+ len += seplen;
+ }
+ num++;
}
+ err = -ENOMEM;
+
str = GF_MALLOC(len, gf_common_mt_char);
- if (str == NULL)
- {
+ if (str == NULL) {
goto out;
}
memcpy(str, pre, prelen);
len = prelen;
- for (i = 0; i < num; i++)
- {
- if (i > 0) {
+ for (i = 0; i < ec->nodes; i++) {
+ if (data[i] == NULL) {
+ continue;
+ }
+ if (data[i] == EC_MISSING_DATA) {
+ if (deflen == 0) {
+ continue;
+ }
+ tmp = deflen;
+ memcpy(str + len, def, tmp);
+ } else {
+ tmp = data[i]->len - 1;
+ memcpy(str + len, data[i]->data, tmp);
+ }
+ len += tmp;
+ if (i < num) {
memcpy(str + len, sep, seplen);
len += seplen;
}
- tmp = data[i]->len - 1;
- memcpy(str + len, data[i]->data, tmp);
- len += tmp;
}
memcpy(str + len, post, postlen + 1);
dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
- if (dict_set_dynstr(dict, key, str) != 0)
- {
+ if (new_key) {
+ key = new_key;
+ }
+ err = dict_set_dynstr(dict, key, str);
+ if (err != 0) {
goto out;
}
str = NULL;
- ret = 0;
-
out:
GF_FREE(str);
GF_FREE(pre);
- return ret;
+ return err;
}
-int32_t ec_dict_data_merge(ec_cbk_data_t * cbk, int32_t which, char * key)
+int32_t
+ec_dict_data_merge(ec_cbk_data_t *cbk, int32_t which, char *key)
{
- data_t * data[cbk->count];
- dict_t * dict, * lockinfo, * tmp;
- char * ptr = NULL;
- int32_t i, num, len;
- int32_t ret = -1;
-
- num = cbk->count;
- if (!ec_dict_list(data, &num, cbk, which, key))
- {
- return -1;
- }
+ ec_t *ec = cbk->fop->xl->private;
+ data_t *data[ec->nodes];
+ dict_t *dict, *lockinfo, *tmp = NULL;
+ char *ptr = NULL;
+ int32_t i, len;
+ int32_t err;
+
+ ec_dict_list(data, cbk, which, key, _gf_false);
lockinfo = dict_new();
- if (lockinfo == NULL)
- {
- return -1;
+ if (lockinfo == NULL) {
+ return -ENOMEM;
}
- if (dict_unserialize(data[0]->data, data[0]->len, &lockinfo) != 0)
- {
- goto out;
- }
+ for (i = 0; i < ec->nodes; i++) {
+ if ((data[i] == NULL) || (data[i] == EC_MISSING_DATA)) {
+ continue;
+ }
- for (i = 1; i < num; i++)
- {
tmp = dict_new();
- if (tmp == NULL)
- {
+ if (tmp == NULL) {
+ err = -ENOMEM;
+
+ goto out;
+ }
+ err = dict_unserialize(data[i]->data, data[i]->len, &tmp);
+ if (err != 0) {
goto out;
}
- if ((dict_unserialize(data[i]->data, data[i]->len, &tmp) != 0) ||
- (dict_copy(tmp, lockinfo) == NULL))
- {
- dict_unref(tmp);
+ if (dict_copy(tmp, lockinfo) == NULL) {
+ err = -ENOMEM;
goto out;
}
@@ -449,157 +483,190 @@ int32_t ec_dict_data_merge(ec_cbk_data_t * cbk, int32_t which, char * key)
dict_unref(tmp);
}
- len = dict_serialized_length(lockinfo);
- if (len < 0)
- {
- goto out;
- }
- ptr = GF_MALLOC(len, gf_common_mt_char);
- if (ptr == NULL)
- {
- goto out;
- }
- if (dict_serialize(lockinfo, ptr) != 0)
- {
+ tmp = NULL;
+
+ err = dict_allocate_and_serialize(lockinfo, (char **)&ptr,
+ (unsigned int *)&len);
+ if (err != 0) {
goto out;
}
+
dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
- if (dict_set_dynptr(dict, key, ptr, len) != 0)
- {
+ err = dict_set_dynptr(dict, key, ptr, len);
+ if (err != 0) {
goto out;
}
ptr = NULL;
- ret = 0;
-
out:
GF_FREE(ptr);
dict_unref(lockinfo);
+ if (tmp != NULL) {
+ dict_unref(tmp);
+ }
- return ret;
+ return err;
}
-int32_t ec_dict_data_uuid(ec_cbk_data_t * cbk, int32_t which, char * key)
+int32_t
+ec_dict_data_uuid(ec_cbk_data_t *cbk, int32_t which, char *key)
{
- ec_cbk_data_t * ans, * min;
- dict_t * src, * dst;
- data_t * data;
+ ec_cbk_data_t *ans, *min;
+ dict_t *src, *dst;
+ data_t *data;
min = cbk;
- for (ans = cbk->next; ans != NULL; ans = ans->next)
- {
- if (ans->idx < min->idx)
- {
+ for (ans = cbk->next; ans != NULL; ans = ans->next) {
+ if (ans->idx < min->idx) {
min = ans;
}
}
- if (min != cbk)
- {
+ if (min != cbk) {
src = (which == EC_COMBINE_XDATA) ? min->xdata : min->dict;
dst = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
data = dict_get(src, key);
- if (data == NULL)
- {
- return -1;
+ if (data == NULL) {
+ return -ENOENT;
}
- if (dict_set(dst, key, data) != 0)
- {
- return -1;
+ if (dict_set(dst, key, data) != 0) {
+ return -ENOMEM;
}
}
return 0;
}
-int32_t ec_dict_data_max32(ec_cbk_data_t *cbk, int32_t which, char *key)
+int32_t
+ec_dict_data_iatt(ec_cbk_data_t *cbk, int32_t which, char *key)
{
- data_t * data[cbk->count];
- dict_t * dict;
- int32_t i, num;
- uint32_t max, tmp;
+ ec_t *ec = cbk->fop->xl->private;
+ data_t *data[ec->nodes];
+ dict_t *dict;
+ struct iatt *stbuf, *tmp;
+ int32_t i, ret;
- num = cbk->count;
- if (!ec_dict_list(data, &num, cbk, which, key))
- {
- return -1;
- }
+ ec_dict_list(data, cbk, which, key, _gf_false);
- if (num <= 1)
- {
- return 0;
+ stbuf = NULL;
+ for (i = 0; i < ec->nodes; i++) {
+ if ((data[i] == NULL) || (data[i] == EC_MISSING_DATA)) {
+ continue;
+ }
+ tmp = data_to_iatt(data[i], key);
+ if (tmp == NULL) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (stbuf == NULL) {
+ stbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char);
+ if (stbuf == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ *stbuf = *tmp;
+ } else {
+ if (!ec_iatt_combine(cbk->fop, stbuf, tmp, 1)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
}
- max = data_to_uint32(data[0]);
- for (i = 1; i < num; i++)
- {
- tmp = data_to_uint32(data[i]);
- if (max < tmp)
- {
- max = tmp;
+ if ((stbuf != NULL) && (stbuf->ia_type == IA_IFREG)) {
+ ec_iatt_rebuild(ec, stbuf, 1, cbk->count);
+ /* TODO: not sure if an iatt could come in xdata from a fop that takes
+ * no locks. */
+ if (!ec_get_inode_size(cbk->fop, cbk->fop->locks[0].lock->loc.inode,
+ &stbuf->ia_size)) {
+ ret = -EINVAL;
+ goto out;
}
}
dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
- if (dict_set_uint32(dict, key, max) != 0)
- {
- return -1;
+ ret = dict_set_iatt(dict, key, stbuf, false);
+ if (ret >= 0) {
+ stbuf = NULL;
}
- return 0;
+out:
+ GF_FREE(stbuf);
+
+ return ret;
}
-int32_t ec_dict_data_max64(ec_cbk_data_t *cbk, int32_t which, char *key)
+int32_t
+ec_dict_data_max32(ec_cbk_data_t *cbk, int32_t which, char *key)
{
- data_t *data[cbk->count];
+ ec_t *ec = cbk->fop->xl->private;
+ data_t *data[ec->nodes];
dict_t *dict;
- int32_t i, num;
- uint64_t max, tmp;
+ int32_t i;
+ uint32_t max, tmp;
- num = cbk->count;
- if (!ec_dict_list(data, &num, cbk, which, key)) {
- return -1;
- }
+ ec_dict_list(data, cbk, which, key, _gf_false);
- if (num <= 1) {
- return 0;
- }
+ max = 0;
+ for (i = 0; i < ec->nodes; i++) {
+ if ((data[i] == NULL) || (data[i] == EC_MISSING_DATA)) {
+ continue;
+ }
- max = data_to_uint64(data[0]);
- for (i = 1; i < num; i++) {
- tmp = data_to_uint64(data[i]);
+ tmp = data_to_uint32(data[i]);
if (max < tmp) {
max = tmp;
}
}
dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
- if (dict_set_uint64(dict, key, max) != 0) {
- return -1;
- }
-
- return 0;
+ return dict_set_uint32(dict, key, max);
}
-int32_t ec_dict_data_quota(ec_cbk_data_t *cbk, int32_t which, char *key)
+int32_t
+ec_dict_data_max64(ec_cbk_data_t *cbk, int32_t which, char *key)
{
- data_t *data[cbk->count];
- dict_t *dict = NULL;
- ec_t *ec = NULL;
- int32_t i = 0;
- int32_t num = 0;
- int32_t ret = -1;
- quota_meta_t size = {0, };
- quota_meta_t max_size = {0, };
+ ec_t *ec = cbk->fop->xl->private;
+ data_t *data[ec->nodes];
+ dict_t *dict;
+ int32_t i;
+ uint64_t max, tmp;
- num = cbk->count;
- if (!ec_dict_list(data, &num, cbk, which, key)) {
- return -1;
+ ec_dict_list(data, cbk, which, key, _gf_false);
+
+ max = 0;
+ for (i = 0; i < ec->nodes; i++) {
+ if ((data[i] == NULL) || (data[i] == EC_MISSING_DATA)) {
+ continue;
+ }
+
+ tmp = data_to_uint64(data[i]);
+ if (max < tmp) {
+ max = tmp;
+ }
}
- if (num == 0) {
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ return dict_set_uint64(dict, key, max);
+}
+
+int32_t
+ec_dict_data_quota(ec_cbk_data_t *cbk, int32_t which, char *key)
+{
+ ec_t *ec = cbk->fop->xl->private;
+ data_t *data[ec->nodes];
+ dict_t *dict = NULL;
+ int32_t i = 0;
+ quota_meta_t size = {
+ 0,
+ };
+ quota_meta_t max_size = {
+ 0,
+ };
+
+ if (ec_dict_list(data, cbk, which, key, _gf_false) == 0) {
return 0;
}
@@ -608,82 +675,83 @@ int32_t ec_dict_data_quota(ec_cbk_data_t *cbk, int32_t which, char *key)
* bricks and we can receive slightly different values. If that's the
* case, we take the maximum of all received values.
*/
- for (i = 0; i < num; i++) {
- ret = quota_data_to_meta (data[i], QUOTA_SIZE_KEY, &size);
- if (ret == -1)
- continue;
+ for (i = 0; i < ec->nodes; i++) {
+ if ((data[i] == NULL) || (data[i] == EC_MISSING_DATA) ||
+ (quota_data_to_meta(data[i], &size) < 0)) {
+ continue;
+ }
if (size.size > max_size.size)
- max_size.size = size.size;
+ max_size.size = size.size;
if (size.file_count > max_size.file_count)
- max_size.file_count = size.file_count;
+ max_size.file_count = size.file_count;
if (size.dir_count > max_size.dir_count)
- max_size.dir_count = size.dir_count;
+ max_size.dir_count = size.dir_count;
}
- ec = cbk->fop->xl->private;
max_size.size *= ec->fragments;
dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
- if (quota_dict_set_meta (dict, key, &max_size, IA_IFDIR) != 0) {
- return -1;
- }
-
- return 0;
+ return quota_dict_set_meta(dict, key, &max_size, IA_IFDIR);
}
-int32_t ec_dict_data_stime(ec_cbk_data_t * cbk, int32_t which, char * key)
+int32_t
+ec_dict_data_stime(ec_cbk_data_t *cbk, int32_t which, char *key)
{
- data_t * data[cbk->count];
- dict_t * dict;
- int32_t i, num;
+ ec_t *ec = cbk->fop->xl->private;
+ data_t *data[ec->nodes];
+ dict_t *dict;
+ int32_t i, err;
- num = cbk->count;
- if (!ec_dict_list(data, &num, cbk, which, key))
- {
- return -1;
- }
+ ec_dict_list(data, cbk, which, key, _gf_false);
dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
- for (i = 1; i < num; i++)
- {
- if (gf_get_max_stime(cbk->fop->xl, dict, key, data[i]) != 0)
- {
- gf_log(cbk->fop->xl->name, GF_LOG_ERROR, "STIME combination "
- "failed");
+ for (i = 0; i < ec->nodes; i++) {
+ if ((data[i] == NULL) || (data[i] == EC_MISSING_DATA)) {
+ continue;
+ }
+ err = gf_get_max_stime(cbk->fop->xl, dict, key, data[i]);
+ if (err != 0) {
+ gf_msg(cbk->fop->xl->name, GF_LOG_ERROR, -err,
+ EC_MSG_STIME_COMBINE_FAIL, "STIME combination failed");
- return -1;
+ return err;
}
}
return 0;
}
-int32_t ec_dict_data_combine(dict_t * dict, char * key, data_t * value,
- void * arg)
+int32_t
+ec_dict_data_combine(dict_t *dict, char *key, data_t *value, void *arg)
{
- ec_dict_combine_t * data = arg;
+ ec_dict_combine_t *data = arg;
if ((strcmp(key, GF_XATTR_PATHINFO_KEY) == 0) ||
- (strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0))
- {
- return ec_dict_data_concat("(<EC:%s> { })", data->cbk, data->which,
- key, data->cbk->fop->xl->name);
+ (strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0)) {
+ return ec_dict_data_concat(data->cbk, data->which, key, NULL, NULL,
+ _gf_false, _gf_false, "(<EC:%s> { })",
+ data->cbk->fop->xl->name);
}
- if (strncmp(key, GF_XATTR_CLRLK_CMD, strlen(GF_XATTR_CLRLK_CMD)) == 0)
- {
- return ec_dict_data_concat("{\n}", data->cbk, data->which, key);
+ if (strncmp(key, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)) == 0) {
+ return ec_dict_data_concat(data->cbk, data->which, key, NULL, NULL,
+ _gf_false, "{\n}");
}
- if (strncmp(key, GF_XATTR_LOCKINFO_KEY,
- strlen(GF_XATTR_LOCKINFO_KEY)) == 0)
- {
+ if (strncmp(key, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) == 0) {
return ec_dict_data_merge(data->cbk, data->which, key);
}
- if (strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0)
- {
+ if (strcmp(key, GET_LINK_COUNT) == 0) {
+ return ec_dict_data_max32(data->cbk, data->which, key);
+ }
+
+ if (strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0) {
+ return ec_dict_data_max32(data->cbk, data->which, key);
+ }
+ if ((strcmp(key, GLUSTERFS_INODELK_COUNT) == 0) ||
+ (strcmp(key, GLUSTERFS_ENTRYLK_COUNT) == 0)) {
return ec_dict_data_max32(data->cbk, data->which, key);
}
@@ -691,17 +759,22 @@ int32_t ec_dict_data_combine(dict_t * dict, char * key, data_t * value,
return ec_dict_data_quota(data->cbk, data->which, key);
}
/* Ignore all other quota attributes */
- if (strncmp(key, EC_QUOTA_PREFIX, strlen(EC_QUOTA_PREFIX)) == 0) {
+ if (strncmp(key, EC_QUOTA_PREFIX, SLEN(EC_QUOTA_PREFIX)) == 0) {
return 0;
}
- if (XATTR_IS_NODE_UUID(key))
- {
- return ec_dict_data_uuid(data->cbk, data->which, key);
+ if (XATTR_IS_NODE_UUID(key)) {
+ if (data->cbk->fop->int32) {
+ /* List of node uuid is requested */
+ return ec_dict_data_concat(data->cbk, data->which, key,
+ GF_XATTR_LIST_NODE_UUIDS_KEY, UUID0_STR,
+ _gf_true, "{ }");
+ } else {
+ return ec_dict_data_uuid(data->cbk, data->which, key);
+ }
}
- if (fnmatch(GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0)
- {
+ if (fnmatch(GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) {
return ec_dict_data_stime(data->cbk, data->which, key);
}
@@ -709,71 +782,74 @@ int32_t ec_dict_data_combine(dict_t * dict, char * key, data_t * value,
return ec_dict_data_max64(data->cbk, data->which, key);
}
+ if (strcmp(key, GF_PRESTAT) == 0 || strcmp(key, GF_POSTSTAT) == 0) {
+ return ec_dict_data_iatt(data->cbk, data->which, key);
+ }
+
return 0;
}
-int32_t ec_dict_combine(ec_cbk_data_t * cbk, int32_t which)
+int32_t
+ec_dict_combine(ec_cbk_data_t *cbk, int32_t which)
{
- dict_t * dict;
+ dict_t *dict = NULL;
ec_dict_combine_t data;
+ int32_t err = 0;
data.cbk = cbk;
data.which = which;
dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
- if ((dict != NULL) &&
- (dict_foreach(dict, ec_dict_data_combine, &data) != 0))
- {
- gf_log(cbk->fop->xl->name, GF_LOG_ERROR, "Dictionary combination "
- "failed");
+ if (dict != NULL) {
+ err = dict_foreach(dict, ec_dict_data_combine, &data);
+ if (err != 0) {
+ gf_msg(cbk->fop->xl->name, GF_LOG_ERROR, -err,
+ EC_MSG_DICT_COMBINE_FAIL, "Dictionary combination failed");
- return 0;
+ return err;
+ }
}
- return 1;
+ return 0;
}
-int32_t ec_vector_compare(struct iovec * dst_vector, int32_t dst_count,
- struct iovec * src_vector, int32_t src_count)
+int32_t
+ec_vector_compare(struct iovec *dst_vector, int32_t dst_count,
+ struct iovec *src_vector, int32_t src_count)
{
int32_t dst_size = 0, src_size = 0;
- if (dst_count > 0)
- {
+ if (dst_count > 0) {
dst_size = iov_length(dst_vector, dst_count);
}
- if (src_count > 0)
- {
+ if (src_count > 0) {
src_size = iov_length(src_vector, src_count);
}
return (dst_size == src_size);
}
-int32_t ec_flock_compare(struct gf_flock * dst, struct gf_flock * src)
+int32_t
+ec_flock_compare(struct gf_flock *dst, struct gf_flock *src)
{
- if ((dst->l_type != src->l_type) ||
- (dst->l_whence != src->l_whence) ||
- (dst->l_start != src->l_start) ||
- (dst->l_len != src->l_len) ||
+ if ((dst->l_type != src->l_type) || (dst->l_whence != src->l_whence) ||
+ (dst->l_start != src->l_start) || (dst->l_len != src->l_len) ||
(dst->l_pid != src->l_pid) ||
- !is_same_lkowner(&dst->l_owner, &src->l_owner))
- {
+ !is_same_lkowner(&dst->l_owner, &src->l_owner)) {
return 0;
}
return 1;
}
-void ec_statvfs_combine(struct statvfs * dst, struct statvfs * src)
+void
+ec_statvfs_combine(struct statvfs *dst, struct statvfs *src)
{
- if (dst->f_bsize < src->f_bsize)
- {
+ if (dst->f_bsize < src->f_bsize) {
dst->f_bsize = src->f_bsize;
}
- if (dst->f_frsize < src->f_frsize)
- {
+ if (dst->f_frsize < src->f_frsize) {
dst->f_blocks *= dst->f_frsize;
dst->f_blocks /= src->f_frsize;
@@ -784,9 +860,7 @@ void ec_statvfs_combine(struct statvfs * dst, struct statvfs * src)
dst->f_bavail /= src->f_frsize;
dst->f_frsize = src->f_frsize;
- }
- else if (dst->f_frsize > src->f_frsize)
- {
+ } else if (dst->f_frsize > src->f_frsize) {
src->f_blocks *= src->f_frsize;
src->f_blocks /= dst->f_frsize;
@@ -796,111 +870,102 @@ void ec_statvfs_combine(struct statvfs * dst, struct statvfs * src)
src->f_bavail *= src->f_frsize;
src->f_bavail /= dst->f_frsize;
}
- if (dst->f_blocks > src->f_blocks)
- {
+ if (dst->f_blocks > src->f_blocks) {
dst->f_blocks = src->f_blocks;
}
- if (dst->f_bfree > src->f_bfree)
- {
+ if (dst->f_bfree > src->f_bfree) {
dst->f_bfree = src->f_bfree;
}
- if (dst->f_bavail > src->f_bavail)
- {
+ if (dst->f_bavail > src->f_bavail) {
dst->f_bavail = src->f_bavail;
}
- if (dst->f_files < src->f_files)
- {
+ if (dst->f_files < src->f_files) {
dst->f_files = src->f_files;
}
- if (dst->f_ffree > src->f_ffree)
- {
+ if (dst->f_ffree > src->f_ffree) {
dst->f_ffree = src->f_ffree;
}
- if (dst->f_favail > src->f_favail)
- {
+ if (dst->f_favail > src->f_favail) {
dst->f_favail = src->f_favail;
}
- if (dst->f_namemax > src->f_namemax)
- {
+ if (dst->f_namemax > src->f_namemax) {
dst->f_namemax = src->f_namemax;
}
- if (dst->f_flag != src->f_flag)
- {
- gf_log(THIS->name, GF_LOG_DEBUG, "Mismatching file system flags "
- "(%lX, %lX)",
- dst->f_flag, src->f_flag);
+ if (dst->f_flag != src->f_flag) {
+ gf_msg_debug(THIS->name, 0,
+ "Mismatching file system flags "
+ "(%lX, %lX)",
+ dst->f_flag, src->f_flag);
}
dst->f_flag &= src->f_flag;
}
-int32_t ec_combine_check(ec_cbk_data_t * dst, ec_cbk_data_t * src,
- ec_combine_f combine)
+int32_t
+ec_combine_check(ec_cbk_data_t *dst, ec_cbk_data_t *src, ec_combine_f combine)
{
- ec_fop_data_t * fop = dst->fop;
+ ec_fop_data_t *fop = dst->fop;
- if (dst->op_ret != src->op_ret)
- {
- gf_log(fop->xl->name, GF_LOG_DEBUG, "Mismatching return code in "
- "answers of '%s': %d <-> %d",
- ec_fop_name(fop->id), dst->op_ret, src->op_ret);
+ if (dst->op_ret != src->op_ret) {
+ gf_msg_debug(fop->xl->name, 0,
+ "Mismatching return code in "
+ "answers of '%s': %d <-> %d",
+ ec_fop_name(fop->id), dst->op_ret, src->op_ret);
return 0;
}
- if (dst->op_ret < 0)
- {
- if (dst->op_errno != src->op_errno)
- {
- gf_log(fop->xl->name, GF_LOG_DEBUG, "Mismatching errno code in "
- "answers of '%s': %d <-> %d",
- ec_fop_name(fop->id), dst->op_errno, src->op_errno);
+ if (dst->op_ret < 0) {
+ if (dst->op_errno != src->op_errno) {
+ gf_msg_debug(fop->xl->name, 0,
+ "Mismatching errno code in "
+ "answers of '%s': %d <-> %d",
+ ec_fop_name(fop->id), dst->op_errno, src->op_errno);
return 0;
}
}
- if (!ec_dict_compare(dst->xdata, src->xdata))
- {
- gf_log(fop->xl->name, GF_LOG_WARNING, "Mismatching xdata in answers "
- "of '%s'",
+ if (!ec_dict_compare(dst->xdata, src->xdata)) {
+ gf_msg(fop->xl->name, GF_LOG_DEBUG, 0, EC_MSG_XDATA_MISMATCH,
+ "Mismatching xdata in answers "
+ "of '%s'",
ec_fop_name(fop->id));
return 0;
}
- if ((dst->op_ret >= 0) && (combine != NULL))
- {
+ if ((dst->op_ret >= 0) && (combine != NULL)) {
return combine(fop, dst, src);
}
return 1;
}
-void ec_combine (ec_cbk_data_t *newcbk, ec_combine_f combine)
+void
+ec_combine(ec_cbk_data_t *newcbk, ec_combine_f combine)
{
ec_fop_data_t *fop = newcbk->fop;
ec_cbk_data_t *cbk = NULL, *tmp = NULL;
struct list_head *item = NULL;
- int32_t needed = 0, resume = 0;
+ int32_t needed = 0;
char str[32];
LOCK(&fop->lock);
+ fop->received |= newcbk->mask;
+
item = fop->cbk_list.prev;
list_for_each_entry(cbk, &fop->cbk_list, list)
{
- if (ec_combine_check(newcbk, cbk, combine))
- {
+ if (ec_combine_check(newcbk, cbk, combine)) {
newcbk->count += cbk->count;
newcbk->mask |= cbk->mask;
item = cbk->list.prev;
- while (item != &fop->cbk_list)
- {
+ while (item != &fop->cbk_list) {
tmp = list_entry(item, ec_cbk_data_t, list);
- if (tmp->count >= newcbk->count)
- {
+ if (tmp->count >= newcbk->count) {
break;
}
item = item->prev;
@@ -917,22 +982,14 @@ void ec_combine (ec_cbk_data_t *newcbk, ec_combine_f combine)
ec_trace("ANSWER", fop, "combine=%s[%d]",
ec_bin(str, sizeof(str), newcbk->mask, 0), newcbk->count);
- if ((newcbk->count == fop->expected) && (fop->answer == NULL)) {
- fop->answer = newcbk;
-
- resume = 1;
- }
-
cbk = list_entry(fop->cbk_list.next, ec_cbk_data_t, list);
- needed = fop->minimum - cbk->count - fop->winds + 1;
+ if ((fop->mask ^ fop->remaining) == fop->received) {
+ needed = fop->minimum - cbk->count;
+ }
UNLOCK(&fop->lock);
if (needed > 0) {
ec_dispatch_next(fop, newcbk->idx);
- } else if (resume) {
- ec_update_bad(fop, newcbk->mask);
-
- ec_resume(fop, 0);
}
}
diff --git a/xlators/cluster/ec/src/ec-combine.h b/xlators/cluster/ec/src/ec-combine.h
index cae2bb9274f..1010cc3be26 100644
--- a/xlators/cluster/ec/src/ec-combine.h
+++ b/xlators/cluster/ec/src/ec-combine.h
@@ -11,27 +11,34 @@
#ifndef __EC_COMBINE_H__
#define __EC_COMBINE_H__
-#define EC_COMBINE_DICT 0
+#define EC_COMBINE_DICT 0
#define EC_COMBINE_XDATA 1
-typedef int32_t (* ec_combine_f)(ec_fop_data_t * fop, ec_cbk_data_t * dst,
- ec_cbk_data_t * src);
+typedef int32_t (*ec_combine_f)(ec_fop_data_t *fop, ec_cbk_data_t *dst,
+ ec_cbk_data_t *src);
-void ec_iatt_rebuild(ec_t * ec, struct iatt * iatt, int32_t count,
- int32_t answers);
+void
+ec_iatt_rebuild(ec_t *ec, struct iatt *iatt, int32_t count, int32_t answers);
-int32_t ec_iatt_combine(struct iatt * dst, struct iatt * src, int32_t count);
-int32_t ec_dict_compare(dict_t * dict1, dict_t * dict2);
-int32_t ec_vector_compare(struct iovec * dst_vector, int32_t dst_count,
- struct iovec * src_vector, int32_t src_count);
-int32_t ec_flock_compare(struct gf_flock * dst, struct gf_flock * src);
-void ec_statvfs_combine(struct statvfs * dst, struct statvfs * src);
+int32_t
+ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src,
+ int32_t count);
+int32_t
+ec_dict_compare(dict_t *dict1, dict_t *dict2);
+int32_t
+ec_vector_compare(struct iovec *dst_vector, int32_t dst_count,
+ struct iovec *src_vector, int32_t src_count);
+int32_t
+ec_flock_compare(struct gf_flock *dst, struct gf_flock *src);
+void
+ec_statvfs_combine(struct statvfs *dst, struct statvfs *src);
-int32_t ec_dict_combine(ec_cbk_data_t * cbk, int32_t which);
+int32_t
+ec_dict_combine(ec_cbk_data_t *cbk, int32_t which);
-void ec_combine(ec_cbk_data_t * cbk, ec_combine_f combine);
+void
+ec_combine(ec_cbk_data_t *cbk, ec_combine_f combine);
int32_t
-ec_combine_write (ec_fop_data_t *fop, ec_cbk_data_t *dst,
- ec_cbk_data_t *src);
+ec_combine_write(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src);
#endif /* __EC_COMBINE_H__ */
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index a4bd8dafe28..b955efd8c2d 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -8,222 +8,382 @@
cases as published by the Free Software Foundation.
*/
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
+#include <glusterfs/hashfn.h>
#include "ec-mem-types.h"
-#include "ec-data.h"
+#include "ec-types.h"
#include "ec-helpers.h"
#include "ec-combine.h"
#include "ec-common.h"
#include "ec-fops.h"
#include "ec-method.h"
#include "ec.h"
+#include "ec-messages.h"
-int32_t ec_child_valid(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+#define EC_INVALID_INDEX UINT32_MAX
+
+void
+ec_update_fd_status(fd_t *fd, xlator_t *xl, int idx, int32_t ret_status)
{
- return (idx < ec->nodes) && (((fop->remaining >> idx) & 1) == 1);
+ ec_fd_t *fd_ctx;
+
+ if (fd == NULL)
+ return;
+
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __ec_fd_get(fd, xl);
+ if (fd_ctx) {
+ if (ret_status >= 0)
+ fd_ctx->fd_status[idx] = EC_FD_OPENED;
+ else
+ fd_ctx->fd_status[idx] = EC_FD_NOT_OPENED;
+ }
+ }
+ UNLOCK(&fd->lock);
}
-int32_t ec_child_next(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+static uintptr_t
+ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t mask)
{
- while (!ec_child_valid(ec, fop, idx))
+ int i = 0;
+ int count = 0;
+ ec_t *ec = NULL;
+ ec_fd_t *fd_ctx = NULL;
+ uintptr_t need_open = 0;
+
+ ec = this->private;
+
+ fd_ctx = ec_fd_get(fd, this);
+ if (!fd_ctx)
+ return count;
+
+ LOCK(&fd->lock);
{
- if (++idx >= ec->nodes)
- {
- idx = 0;
- }
- if (idx == fop->first)
- {
- return -1;
+ for (i = 0; i < ec->nodes; i++) {
+ if ((fd_ctx->fd_status[i] == EC_FD_NOT_OPENED) &&
+ ((ec->xl_up & (1 << i)) != 0) && ((mask & (1 << i)) != 0)) {
+ fd_ctx->fd_status[i] = EC_FD_OPENING;
+ need_open |= (1 << i);
+ count++;
+ }
}
}
+ UNLOCK(&fd->lock);
- return idx;
+ /* If fd needs to open on minimum number of nodes
+ * then ignore fixing the fd as it has been
+ * requested from heal operation.
+ */
+ if (count >= ec->fragments) {
+ need_open = 0;
+ }
+
+ return need_open;
}
-uintptr_t ec_inode_good(inode_t * inode, xlator_t * xl)
+static gf_boolean_t
+ec_is_fd_fixable(fd_t *fd)
{
- ec_inode_t * ctx;
- uintptr_t bad = 0;
+ if (!fd || !fd->inode)
+ return _gf_false;
+ else if (fd_is_anonymous(fd))
+ return _gf_false;
+ else if (gf_uuid_is_null(fd->inode->gfid))
+ return _gf_false;
- ctx = ec_inode_get(inode, xl);
- if (ctx != NULL)
- {
- bad = ctx->bad;
+ return _gf_true;
+}
+
+static void
+ec_fix_open(ec_fop_data_t *fop, uintptr_t mask)
+{
+ uintptr_t need_open = 0;
+ int ret = 0;
+ int32_t flags = 0;
+ loc_t loc = {
+ 0,
+ };
+
+ if (!ec_is_fd_fixable(fop->fd))
+ goto out;
+
+ /* Evaluate how many remote fd's to be opened */
+ need_open = ec_fd_ctx_need_open(fop->fd, fop->xl, mask);
+ if (need_open == 0) {
+ goto out;
+ }
+
+ loc.inode = inode_ref(fop->fd->inode);
+ gf_uuid_copy(loc.gfid, fop->fd->inode->gfid);
+ ret = loc_path(&loc, NULL);
+ if (ret < 0) {
+ goto out;
+ }
+
+ flags = fop->fd->flags & (~(O_TRUNC | O_APPEND | O_CREAT | O_EXCL));
+ if (IA_IFDIR == fop->fd->inode->ia_type) {
+ ec_opendir(fop->frame, fop->xl, need_open,
+ EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL,
+ &fop->loc[0], fop->fd, NULL);
+ } else {
+ ec_open(fop->frame, fop->xl, need_open,
+ EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL, &loc,
+ flags, fop->fd, NULL);
}
- return ~bad;
+out:
+ loc_wipe(&loc);
}
-uintptr_t ec_fd_good(fd_t * fd, xlator_t * xl)
+static off_t
+ec_range_end_get(off_t fl_start, uint64_t fl_size)
{
- ec_fd_t * ctx;
- uintptr_t bad = 0;
-
- ctx = ec_fd_get(fd, xl);
- if (ctx != NULL)
- {
- bad = ctx->bad;
+ if (fl_size > 0) {
+ if (fl_size >= EC_RANGE_FULL) {
+ /* Infinity */
+ fl_start = LLONG_MAX;
+ } else {
+ fl_start += fl_size - 1;
+ if (fl_start < 0) {
+ /* Overflow */
+ fl_start = LLONG_MAX;
+ }
+ }
}
- return ~bad;
+ return fl_start;
}
-uintptr_t ec_update_inode(ec_fop_data_t * fop, inode_t * inode, uintptr_t good,
- uintptr_t bad)
+static gf_boolean_t
+ec_is_range_conflict(ec_lock_link_t *l1, ec_lock_link_t *l2)
{
- ec_inode_t * ctx = NULL;
+ return ((l1->fl_end >= l2->fl_start) && (l2->fl_end >= l1->fl_start));
+}
- if (inode != NULL)
- {
- LOCK(&inode->lock);
+static gf_boolean_t
+ec_lock_conflict(ec_lock_link_t *l1, ec_lock_link_t *l2)
+{
+ ec_t *ec = l1->fop->xl->private;
- ctx = __ec_inode_get(inode, fop->xl);
- if (ctx != NULL)
- {
- ctx->bad &= ~good;
- bad |= ctx->bad;
- ctx->bad = bad;
- }
+ /* Fops like access/stat won't have to worry what the other fops are
+ * modifying as the fop is wound only to one brick. So it can be
+ * executed in parallel*/
+ if (l1->fop->minimum == EC_MINIMUM_ONE ||
+ l2->fop->minimum == EC_MINIMUM_ONE)
+ return _gf_false;
+
+ if ((l1->fop->flags & EC_FLAG_LOCK_SHARED) &&
+ (l2->fop->flags & EC_FLAG_LOCK_SHARED))
+ return _gf_false;
- UNLOCK(&inode->lock);
+ if (!ec->parallel_writes) {
+ return _gf_true;
}
- return bad;
+ return ec_is_range_conflict(l1, l2);
}
-uintptr_t ec_update_fd(ec_fop_data_t * fop, fd_t * fd, uintptr_t good,
- uintptr_t bad)
+uint32_t
+ec_select_first_by_read_policy(ec_t *ec, ec_fop_data_t *fop)
{
- ec_fd_t * ctx = NULL;
+ if (ec->read_policy == EC_ROUND_ROBIN) {
+ return ec->idx;
+ } else if (ec->read_policy == EC_GFID_HASH) {
+ if (fop->use_fd) {
+ return SuperFastHash((char *)fop->fd->inode->gfid,
+ sizeof(fop->fd->inode->gfid)) %
+ ec->nodes;
+ } else {
+ if (gf_uuid_is_null(fop->loc[0].gfid))
+ loc_gfid(&fop->loc[0], fop->loc[0].gfid);
+ return SuperFastHash((char *)fop->loc[0].gfid,
+ sizeof(fop->loc[0].gfid)) %
+ ec->nodes;
+ }
+ }
+ return 0;
+}
- LOCK(&fd->lock);
+static gf_boolean_t
+ec_child_valid(ec_t *ec, ec_fop_data_t *fop, uint32_t idx)
+{
+ return (idx < ec->nodes) && (((fop->remaining >> idx) & 1) == 1);
+}
- ctx = __ec_fd_get(fd, fop->xl);
- if (ctx != NULL)
- {
- ctx->bad &= ~good;
- bad |= ctx->bad;
- ctx->bad = bad;
+static uint32_t
+ec_child_next(ec_t *ec, ec_fop_data_t *fop, uint32_t idx)
+{
+ while (!ec_child_valid(ec, fop, idx)) {
+ if (++idx >= ec->nodes) {
+ idx = 0;
+ }
+ if (idx == fop->first) {
+ return EC_INVALID_INDEX;
+ }
}
- UNLOCK(&fd->lock);
-
- return bad;
+ return idx;
}
-int32_t ec_heal_report(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, uintptr_t mask,
- uintptr_t good, uintptr_t bad, dict_t * xdata)
+int32_t
+ec_heal_report(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uintptr_t mask, uintptr_t good,
+ uintptr_t bad, uint32_t pending, dict_t *xdata)
{
if (op_ret < 0) {
- gf_log(this->name, GF_LOG_WARNING, "Heal failed (error %d)",
- op_errno);
+ gf_msg(this->name, GF_LOG_DEBUG, op_errno, EC_MSG_HEAL_FAIL,
+ "Heal failed");
} else {
if ((mask & ~good) != 0) {
- gf_log(this->name, GF_LOG_INFO, "Heal succeeded on %d/%d "
- "subvolumes",
- ec_bits_count(mask & ~(good | bad)),
- ec_bits_count(mask & ~good));
+ gf_msg(this->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_SUCCESS,
+ "Heal succeeded on %d/%d "
+ "subvolumes",
+ gf_bits_count(mask & ~(good | bad)),
+ gf_bits_count(mask & ~good));
}
}
return 0;
}
-int32_t ec_fop_needs_heal(ec_fop_data_t *fop)
+static uintptr_t
+ec_fop_needs_name_heal(ec_fop_data_t *fop)
+{
+ ec_t *ec = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ ec_cbk_data_t *enoent_cbk = NULL;
+
+ ec = fop->xl->private;
+ if (fop->id != GF_FOP_LOOKUP)
+ return 0;
+
+ if (!fop->loc[0].name || strlen(fop->loc[0].name) == 0)
+ return 0;
+
+ list_for_each_entry(cbk, &fop->cbk_list, list)
+ {
+ if (cbk->op_ret < 0 && cbk->op_errno == ENOENT) {
+ enoent_cbk = cbk;
+ break;
+ }
+ }
+
+ if (!enoent_cbk)
+ return 0;
+
+ return ec->xl_up & ~enoent_cbk->mask;
+}
+
+int32_t
+ec_fop_needs_heal(ec_fop_data_t *fop)
{
ec_t *ec = fop->xl->private;
+ if (fop->lock_count == 0) {
+ /*
+ * if fop->lock_count is zero that means it saw version mismatch
+ * without any locks so it can't be trusted. If we launch a heal
+ * based on this it will lead to INODELKs which will affect I/O
+ * performance. Considering self-heal-daemon and operations on
+ * the inode from client which take locks can still trigger the
+ * heal we can choose to not attempt a heal when fop->lock_count
+ * is zero.
+ */
+ return 0;
+ }
return (ec->xl_up & ~(fop->remaining | fop->good)) != 0;
}
-void ec_check_status(ec_fop_data_t * fop)
+void
+ec_check_status(ec_fop_data_t *fop)
{
- ec_t * ec = fop->xl->private;
+ ec_t *ec = fop->xl->private;
int32_t partial = 0;
+ char str1[32], str2[32], str3[32], str4[32], str5[32];
- if (fop->answer->op_ret >= 0) {
- if ((fop->id == GF_FOP_LOOKUP) ||
- (fop->id == GF_FOP_STAT) || (fop->id == GF_FOP_FSTAT)) {
+ if (!ec_fop_needs_name_heal(fop) && !ec_fop_needs_heal(fop)) {
+ return;
+ }
+
+ if (fop->answer && fop->answer->op_ret >= 0) {
+ if ((fop->id == GF_FOP_LOOKUP) || (fop->id == GF_FOP_STAT) ||
+ (fop->id == GF_FOP_FSTAT)) {
partial = fop->answer->iatt[0].ia_type == IA_IFDIR;
} else if (fop->id == GF_FOP_OPENDIR) {
partial = 1;
}
}
- if (!ec_fop_needs_heal(fop)) {
- return;
- }
-
- gf_log(fop->xl->name, GF_LOG_WARNING, "Operation failed on some "
- "subvolumes (up=%lX, mask=%lX, "
- "remaining=%lX, good=%lX, bad=%lX)",
- ec->xl_up, fop->mask, fop->remaining, fop->good, fop->bad);
-
- if (fop->use_fd)
- {
+ gf_msg(
+ fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
+ "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
+ "remaining=%s, good=%s, bad=%s,"
+ "(Least significant bit represents first client/brick of subvol), %s)",
+ gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
+ ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
+ ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
+ ec->nodes),
+ ec_msg_str(fop));
+ if (fop->use_fd) {
if (fop->fd != NULL) {
ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
fop->fd, partial, NULL);
}
- }
- else
- {
+ } else {
ec_heal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
&fop->loc[0], partial, NULL);
- if (fop->loc[1].inode != NULL)
- {
+ if (fop->loc[1].inode != NULL) {
ec_heal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
&fop->loc[1], partial, NULL);
}
}
}
-void ec_update_bad(ec_fop_data_t * fop, uintptr_t good)
+void
+ec_update_good(ec_fop_data_t *fop, uintptr_t good)
{
- ec_t *ec = fop->xl->private;
- uintptr_t bad;
-
- bad = ec->xl_up & ~(fop->remaining | good);
- fop->bad |= bad;
- fop->good |= good;
-
- if (fop->parent == NULL)
- {
- if ((fop->flags & EC_FLAG_UPDATE_LOC_PARENT) != 0)
- {
- ec_update_inode(fop, fop->loc[0].parent, good, bad);
- }
- if ((fop->flags & EC_FLAG_UPDATE_LOC_INODE) != 0)
- {
- ec_update_inode(fop, fop->loc[0].inode, good, bad);
- }
- ec_update_inode(fop, fop->loc[1].inode, good, bad);
- if ((fop->flags & EC_FLAG_UPDATE_FD_INODE) != 0)
- {
- ec_update_inode(fop, fop->fd->inode, good, bad);
- }
- if ((fop->flags & EC_FLAG_UPDATE_FD) != 0)
- {
- ec_update_fd(fop, fop->fd, good, bad);
- }
+ fop->good = good;
+ /* Fops that are executed only on one brick do not have enough information
+ * to decide if healing is needed or not. */
+ if ((fop->expected != 1) && (fop->parent == NULL)) {
ec_check_status(fop);
}
}
+void
+ec_lock_update_good(ec_lock_t *lock, ec_fop_data_t *fop)
+{
+ /* Fops that are executed only on one brick do not have enough information
+ * to update the global mask of good bricks. */
+ if (fop->expected == 1) {
+ return;
+ }
+
+ /* When updating the good mask of the lock, we only take into consideration
+ * those bits corresponding to the bricks where the fop has been executed.
+ * Bad bricks are removed from good_mask, but once marked as bad it's never
+ * set to good until the lock is released and reacquired */
+
+ lock->good_mask &= fop->good | fop->remaining;
+}
-void __ec_fop_set_error(ec_fop_data_t * fop, int32_t error)
+void
+__ec_fop_set_error(ec_fop_data_t *fop, int32_t error)
{
- if ((error != 0) && (fop->error == 0))
- {
+ if ((error != 0) && (fop->error == 0)) {
fop->error = error;
}
}
-void ec_fop_set_error(ec_fop_data_t * fop, int32_t error)
+void
+ec_fop_set_error(ec_fop_data_t *fop, int32_t error)
{
LOCK(&fop->lock);
@@ -232,17 +392,63 @@ void ec_fop_set_error(ec_fop_data_t * fop, int32_t error)
UNLOCK(&fop->lock);
}
-void ec_sleep(ec_fop_data_t *fop)
+gf_boolean_t
+ec_cbk_set_error(ec_cbk_data_t *cbk, int32_t error, gf_boolean_t ro)
+{
+ if ((error != 0) && (cbk->op_ret >= 0)) {
+ /* If cbk->op_errno was 0, it means that the fop succeeded and this
+ * error has happened while processing the answer. If the operation was
+ * read-only, there's no problem (i.e. we simply return the generated
+ * error code). However if it caused a modification, we must return EIO
+ * to indicate that the operation has been partially executed. */
+ cbk->op_errno = ro ? error : EIO;
+ cbk->op_ret = -1;
+
+ ec_fop_set_error(cbk->fop, cbk->op_errno);
+ }
+
+ return (cbk->op_ret < 0);
+}
+
+ec_cbk_data_t *
+ec_fop_prepare_answer(ec_fop_data_t *fop, gf_boolean_t ro)
+{
+ ec_cbk_data_t *cbk;
+ int32_t err;
+
+ cbk = fop->answer;
+ if (cbk == NULL) {
+ ec_fop_set_error(fop, EIO);
+
+ return NULL;
+ }
+
+ if (cbk->op_ret < 0) {
+ ec_fop_set_error(fop, cbk->op_errno);
+ }
+
+ err = ec_dict_combine(cbk, EC_COMBINE_XDATA);
+ if (ec_cbk_set_error(cbk, -err, ro)) {
+ return NULL;
+ }
+
+ return cbk;
+}
+
+void
+ec_sleep(ec_fop_data_t *fop)
{
LOCK(&fop->lock);
+ GF_ASSERT(fop->refs > 0);
fop->refs++;
fop->jobs++;
UNLOCK(&fop->lock);
}
-int32_t ec_check_complete(ec_fop_data_t * fop, ec_resume_f resume)
+int32_t
+ec_check_complete(ec_fop_data_t *fop, ec_resume_f resume)
{
int32_t error = -1;
@@ -250,14 +456,11 @@ int32_t ec_check_complete(ec_fop_data_t * fop, ec_resume_f resume)
GF_ASSERT(fop->resume == NULL);
- if (fop->jobs != 0)
- {
+ if (--fop->jobs != 0) {
ec_trace("WAIT", fop, "resume=%p", resume);
fop->resume = resume;
- }
- else
- {
+ } else {
error = fop->error;
fop->error = 0;
}
@@ -267,22 +470,8 @@ int32_t ec_check_complete(ec_fop_data_t * fop, ec_resume_f resume)
return error;
}
-void ec_wait_winds(ec_fop_data_t * fop)
-{
- LOCK(&fop->lock);
-
- if (fop->winds > 0)
- {
- fop->jobs++;
- fop->refs++;
-
- fop->flags |= EC_FLAG_WAITING_WINDS;
- }
-
- UNLOCK(&fop->lock);
-}
-
-void ec_resume(ec_fop_data_t * fop, int32_t error)
+void
+ec_resume(ec_fop_data_t *fop, int32_t error)
{
ec_resume_f resume = NULL;
@@ -290,16 +479,13 @@ void ec_resume(ec_fop_data_t * fop, int32_t error)
__ec_fop_set_error(fop, error);
- if (--fop->jobs == 0)
- {
+ if (--fop->jobs == 0) {
resume = fop->resume;
fop->resume = NULL;
- if (resume != NULL)
- {
+ if (resume != NULL) {
ec_trace("RESUME", fop, "error=%d", error);
- if (fop->error != 0)
- {
+ if (fop->error != 0) {
error = fop->error;
}
fop->error = 0;
@@ -308,30 +494,48 @@ void ec_resume(ec_fop_data_t * fop, int32_t error)
UNLOCK(&fop->lock);
- if (resume != NULL)
- {
+ if (resume != NULL) {
resume(fop, error);
}
ec_fop_data_release(fop);
}
-void ec_resume_parent(ec_fop_data_t * fop, int32_t error)
+void
+ec_resume_parent(ec_fop_data_t *fop)
{
- ec_fop_data_t * parent;
+ ec_fop_data_t *parent;
+ int32_t error = 0;
parent = fop->parent;
- if (parent != NULL)
- {
+ if (parent != NULL) {
+ if ((fop->fop_flags & EC_FOP_NO_PROPAGATE_ERROR) == 0) {
+ error = fop->error;
+ }
ec_trace("RESUME_PARENT", fop, "error=%u", error);
fop->parent = NULL;
ec_resume(parent, error);
}
}
-void ec_complete(ec_fop_data_t * fop)
+gf_boolean_t
+ec_is_recoverable_error(int32_t op_errno)
+{
+ switch (op_errno) {
+ case ENOTCONN:
+ case ESTALE:
+ case ENOENT:
+ case EBADFD: /*Opened fd but brick is disconnected*/
+ case EIO: /*Backend-fs crash like XFS/ext4 etc*/
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+void
+ec_complete(ec_fop_data_t *fop)
{
- ec_cbk_data_t * cbk = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t resume = 0, update = 0;
int healing_count = 0;
@@ -343,83 +547,156 @@ void ec_complete(ec_fop_data_t * fop)
if (fop->answer == NULL) {
if (!list_empty(&fop->cbk_list)) {
cbk = list_entry(fop->cbk_list.next, ec_cbk_data_t, list);
- healing_count = ec_bits_count (cbk->mask & fop->healing);
+ healing_count = gf_bits_count(cbk->mask & fop->healing);
+ /* fop shouldn't be treated as success if it is not
+ * successful on at least fop->minimum good copies*/
if ((cbk->count - healing_count) >= fop->minimum) {
- /* fop shouldn't be treated as success if it is not
- * successful on at least fop->minimum good copies*/
- if ((cbk->op_ret >= 0) || (cbk->op_errno != ENOTCONN)) {
- fop->answer = cbk;
+ fop->answer = cbk;
- update = 1;
- }
+ update = 1;
}
}
resume = 1;
}
- else if ((fop->flags & EC_FLAG_WAITING_WINDS) != 0)
- {
- resume = 1;
- }
}
UNLOCK(&fop->lock);
- /* ec_update_bad() locks inode->lock. This may cause deadlocks with
- fop->lock when used in another order. Since ec_update_bad() will not
+ /* ec_update_good() locks inode->lock. This may cause deadlocks with
+ fop->lock when used in another order. Since ec_update_good() will not
be called more than once for each fop, it can be called from outside
the fop->lock locked region. */
if (update) {
- ec_update_bad(fop, cbk->mask);
+ ec_update_good(fop, cbk->mask);
}
- if (resume)
- {
+ if (resume) {
ec_resume(fop, 0);
}
ec_fop_data_release(fop);
}
-int32_t ec_child_select(ec_fop_data_t * fop)
+/* There could be already granted locks sitting on the bricks, unlock for which
+ * must be wound at all costs*/
+static gf_boolean_t
+ec_must_wind(ec_fop_data_t *fop)
{
- ec_t * ec = fop->xl->private;
- uintptr_t mask = 0;
- int32_t first = 0, num = 0;
+ if ((fop->id == GF_FOP_INODELK) || (fop->id == GF_FOP_FINODELK) ||
+ (fop->id == GF_FOP_LK)) {
+ if (fop->flock.l_type == F_UNLCK)
+ return _gf_true;
+ } else if ((fop->id == GF_FOP_ENTRYLK) || (fop->id == GF_FOP_FENTRYLK)) {
+ if (fop->entrylk_cmd == ENTRYLK_UNLOCK)
+ return _gf_true;
+ }
- fop->mask &= ec->node_mask;
+ return _gf_false;
+}
- mask = ec->xl_up;
- if (fop->parent == NULL)
- {
- if (fop->loc[0].inode != NULL) {
- mask &= ec_inode_good(fop->loc[0].inode, fop->xl);
- }
- if (fop->loc[1].inode != NULL) {
- mask &= ec_inode_good(fop->loc[1].inode, fop->xl);
+static gf_boolean_t
+ec_internal_op(ec_fop_data_t *fop)
+{
+ if (ec_must_wind(fop))
+ return _gf_true;
+ if (fop->id == GF_FOP_XATTROP)
+ return _gf_true;
+ if (fop->id == GF_FOP_FXATTROP)
+ return _gf_true;
+ if (fop->id == GF_FOP_OPEN)
+ return _gf_true;
+ return _gf_false;
+}
+
+char *
+ec_msg_str(ec_fop_data_t *fop)
+{
+ loc_t *loc1 = NULL;
+ loc_t *loc2 = NULL;
+ char gfid1[64] = {0};
+ char gfid2[64] = {0};
+ ec_fop_data_t *parent = fop->parent;
+
+ if (fop->errstr)
+ return fop->errstr;
+ if (!fop->use_fd) {
+ loc1 = &fop->loc[0];
+ loc2 = &fop->loc[1];
+
+ if (fop->id == GF_FOP_RENAME) {
+ gf_asprintf(&fop->errstr,
+ "FOP : '%s' failed on '%s' and '%s' with gfids "
+ "%s and %s respectively. Parent FOP: %s",
+ ec_fop_name(fop->id), loc1->path, loc2->path,
+ uuid_utoa_r(loc1->gfid, gfid1),
+ uuid_utoa_r(loc2->gfid, gfid2),
+ parent ? ec_fop_name(parent->id) : "No Parent");
+ } else {
+ gf_asprintf(
+ &fop->errstr,
+ "FOP : '%s' failed on '%s' with gfid %s. Parent FOP: %s",
+ ec_fop_name(fop->id), loc1->path,
+ uuid_utoa_r(loc1->gfid, gfid1),
+ parent ? ec_fop_name(parent->id) : "No Parent");
}
- if (fop->fd != NULL) {
- if (fop->fd->inode != NULL) {
- mask &= ec_inode_good(fop->fd->inode, fop->xl);
- }
- mask &= ec_fd_good(fop->fd, fop->xl);
+ } else {
+ gf_asprintf(
+ &fop->errstr, "FOP : '%s' failed on gfid %s. Parent FOP: %s",
+ ec_fop_name(fop->id), uuid_utoa_r(fop->fd->inode->gfid, gfid1),
+ parent ? ec_fop_name(parent->id) : "No Parent");
+ }
+ return fop->errstr;
+}
+
+static void
+ec_log_insufficient_vol(ec_fop_data_t *fop, int32_t have, uint32_t need,
+ int32_t loglevel)
+{
+ ec_t *ec = fop->xl->private;
+ char str1[32], str2[32], str3[32];
+
+ gf_msg(ec->xl->name, loglevel, 0, EC_MSG_CHILDS_INSUFFICIENT,
+ "Insufficient available children for this request: "
+ "Have : %d, Need : %u : Child UP : %s "
+ "Mask: %s, Healing : %s : %s ",
+ have, need, ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->healing, ec->nodes),
+ ec_msg_str(fop));
+}
+
+static int32_t
+ec_child_select(ec_fop_data_t *fop)
+{
+ ec_t *ec = fop->xl->private;
+ int32_t first = 0, num = 0;
+
+ ec_fop_cleanup(fop);
+
+ fop->mask &= ec->node_mask;
+ /* Wind the fop on same subvols as parent for any internal extra fops like
+ * head/tail read in case of writev fop. Unlocks shouldn't do this because
+ * unlock should go on all subvols where lock is performed*/
+ if (fop->parent && !ec_internal_op(fop)) {
+ fop->mask &= (fop->parent->mask & ~fop->parent->healing);
+ if (ec_is_data_fop(fop->id)) {
+ fop->healing |= fop->parent->healing;
}
}
- if ((fop->mask & ~mask) != 0)
- {
- gf_log(fop->xl->name, GF_LOG_WARNING, "Executing operation with "
- "some subvolumes unavailable "
- "(%lX)", fop->mask & ~mask);
- fop->mask &= mask;
+ if ((fop->mask & ~ec->xl_up) != 0) {
+ gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_EXEC_UNAVAIL,
+ "Executing operation with "
+ "some subvolumes unavailable. (%" PRIXPTR "). %s ",
+ fop->mask & ~ec->xl_up, ec_msg_str(fop));
+ fop->mask &= ec->xl_up;
}
- switch (fop->minimum)
- {
+ switch (fop->minimum) {
case EC_MINIMUM_ALL:
- fop->minimum = ec_bits_count(fop->mask);
- if (fop->minimum >= ec->fragments)
- {
+ fop->minimum = gf_bits_count(fop->mask);
+ if (fop->minimum >= ec->fragments) {
break;
}
case EC_MINIMUM_MIN:
@@ -429,43 +706,51 @@ int32_t ec_child_select(ec_fop_data_t * fop)
fop->minimum = 1;
}
- first = ec->idx;
- if (++first >= ec->nodes)
- {
- first = 0;
+ if (ec->read_policy == EC_ROUND_ROBIN) {
+ first = ec->idx;
+ if (++first >= ec->nodes) {
+ first = 0;
+ }
+ ec->idx = first;
}
- ec->idx = first;
+ num = gf_bits_count(fop->mask);
/*Unconditionally wind on healing subvolumes*/
fop->mask |= fop->healing;
fop->remaining = fop->mask;
+ fop->received = 0;
ec_trace("SELECT", fop, "");
- num = ec_bits_count(fop->mask);
- if ((num < fop->minimum) && (num < ec->fragments))
- {
- gf_log(ec->xl->name, GF_LOG_ERROR, "Insufficient available childs "
- "for this request (have %d, need "
- "%d)", num, fop->minimum);
-
+ if ((num < fop->minimum) && (num < ec->fragments)) {
+ ec_log_insufficient_vol(fop, num, fop->minimum, GF_LOG_ERROR);
return 0;
}
- ec_sleep(fop);
+ if (!fop->parent && fop->lock_count &&
+ (fop->locks[0].update[EC_DATA_TXN] ||
+ fop->locks[0].update[EC_METADATA_TXN])) {
+ if (ec->quorum_count && (num < ec->quorum_count)) {
+ ec_log_insufficient_vol(fop, num, ec->quorum_count, GF_LOG_ERROR);
+ return 0;
+ }
+ }
return 1;
}
-int32_t ec_dispatch_next(ec_fop_data_t * fop, int32_t idx)
+void
+ec_dispatch_next(ec_fop_data_t *fop, uint32_t idx)
{
- ec_t * ec = fop->xl->private;
+ uint32_t i = EC_INVALID_INDEX;
+ ec_t *ec = fop->xl->private;
LOCK(&fop->lock);
- idx = ec_child_next(ec, fop, idx);
- if (idx >= 0)
- {
+ i = ec_child_next(ec, fop, idx);
+ if (i < EC_MAX_NODES) {
+ idx = i;
+
fop->remaining ^= 1ULL << idx;
ec_trace("EXECUTE", fop, "idx=%d", idx);
@@ -476,20 +761,18 @@ int32_t ec_dispatch_next(ec_fop_data_t * fop, int32_t idx)
UNLOCK(&fop->lock);
- if (idx >= 0)
- {
+ if (i < EC_MAX_NODES) {
fop->wind(ec, fop, idx);
}
-
- return idx;
}
-void ec_dispatch_mask(ec_fop_data_t * fop, uintptr_t mask)
+void
+ec_dispatch_mask(ec_fop_data_t *fop, uintptr_t mask)
{
- ec_t * ec = fop->xl->private;
+ ec_t *ec = fop->xl->private;
int32_t count, idx;
- count = ec_bits_count(mask);
+ count = gf_bits_count(mask);
LOCK(&fop->lock);
@@ -503,10 +786,8 @@ void ec_dispatch_mask(ec_fop_data_t * fop, uintptr_t mask)
UNLOCK(&fop->lock);
idx = 0;
- while (mask != 0)
- {
- if ((mask & 1) != 0)
- {
+ while (mask != 0) {
+ if ((mask & 1) != 0) {
fop->wind(ec, fop, idx);
}
idx++;
@@ -514,53 +795,64 @@ void ec_dispatch_mask(ec_fop_data_t * fop, uintptr_t mask)
}
}
-void ec_dispatch_start(ec_fop_data_t * fop)
+void
+ec_dispatch_start(ec_fop_data_t *fop)
{
fop->answer = NULL;
fop->good = 0;
- fop->bad = 0;
INIT_LIST_HEAD(&fop->cbk_list);
- if (fop->lock_count > 0)
- {
+ if (fop->lock_count > 0) {
ec_owner_copy(fop->frame, &fop->req_frame->root->lk_owner);
}
}
-void ec_dispatch_one(ec_fop_data_t * fop)
+void
+ec_dispatch_one(ec_fop_data_t *fop)
{
- ec_t * ec = fop->xl->private;
-
ec_dispatch_start(fop);
- if (ec_child_select(fop))
- {
+ if (ec_child_select(fop)) {
+ ec_sleep(fop);
+
fop->expected = 1;
- fop->first = ec->idx;
+ fop->first = ec_select_first_by_read_policy(fop->xl->private, fop);
ec_dispatch_next(fop, fop->first);
}
}
-int32_t ec_dispatch_one_retry(ec_fop_data_t * fop, int32_t idx, int32_t op_ret,
- int32_t op_errno)
+gf_boolean_t
+ec_dispatch_one_retry(ec_fop_data_t *fop, ec_cbk_data_t **cbk)
{
- if ((op_ret < 0) && (op_errno == ENOTCONN))
- {
- return (ec_dispatch_next(fop, idx) >= 0);
+ ec_cbk_data_t *tmp;
+
+ tmp = ec_fop_prepare_answer(fop, _gf_true);
+ if (cbk != NULL) {
+ *cbk = tmp;
+ }
+ if ((tmp != NULL) && (tmp->op_ret < 0) &&
+ ec_is_recoverable_error(tmp->op_errno)) {
+ GF_ASSERT(fop->mask & (1ULL << tmp->idx));
+ fop->mask ^= (1ULL << tmp->idx);
+ if (fop->mask) {
+ return _gf_true;
+ }
}
- return 0;
+ return _gf_false;
}
-void ec_dispatch_inc(ec_fop_data_t * fop)
+void
+ec_dispatch_inc(ec_fop_data_t *fop)
{
ec_dispatch_start(fop);
- if (ec_child_select(fop))
- {
- fop->expected = ec_bits_count(fop->remaining);
+ if (ec_child_select(fop)) {
+ ec_sleep(fop);
+
+ fop->expected = gf_bits_count(fop->remaining);
fop->first = 0;
ec_dispatch_next(fop, 0);
@@ -568,989 +860,1977 @@ void ec_dispatch_inc(ec_fop_data_t * fop)
}
void
-ec_dispatch_all (ec_fop_data_t *fop)
+ec_dispatch_all(ec_fop_data_t *fop)
{
- ec_dispatch_start(fop);
+ ec_dispatch_start(fop);
- if (ec_child_select(fop)) {
- fop->expected = ec_bits_count(fop->remaining);
- fop->first = 0;
+ if (ec_child_select(fop)) {
+ ec_sleep(fop);
- ec_dispatch_mask(fop, fop->remaining);
- }
+ fop->expected = gf_bits_count(fop->remaining);
+ fop->first = 0;
+
+ ec_dispatch_mask(fop, fop->remaining);
+ }
}
-void ec_dispatch_min(ec_fop_data_t * fop)
+void
+ec_dispatch_min(ec_fop_data_t *fop)
{
- ec_t * ec = fop->xl->private;
+ ec_t *ec = fop->xl->private;
uintptr_t mask;
- int32_t idx, count;
+ uint32_t idx;
+ int32_t count;
ec_dispatch_start(fop);
- if (ec_child_select(fop))
- {
+ if (ec_child_select(fop)) {
+ ec_sleep(fop);
+
fop->expected = count = ec->fragments;
- fop->first = ec->idx;
+ fop->first = ec_select_first_by_read_policy(fop->xl->private, fop);
idx = fop->first - 1;
mask = 0;
- while (count-- > 0)
- {
+ while (count-- > 0) {
idx = ec_child_next(ec, fop, idx + 1);
- mask |= 1ULL << idx;
+ if (idx < EC_MAX_NODES)
+ mask |= 1ULL << idx;
}
ec_dispatch_mask(fop, mask);
}
}
-ec_lock_t * ec_lock_allocate(xlator_t * xl, int32_t kind, loc_t * loc)
+void
+ec_succeed_all(ec_fop_data_t *fop)
{
- ec_t * ec = xl->private;
- ec_lock_t * lock;
+ ec_dispatch_start(fop);
+
+ if (ec_child_select(fop)) {
+ fop->expected = gf_bits_count(fop->remaining);
+ fop->first = 0;
+
+ /* Simulate a successful execution on all bricks */
+ ec_trace("SUCCEED", fop, "");
+
+ fop->good = fop->remaining;
+ fop->remaining = 0;
+ }
+}
+
+ec_lock_t *
+ec_lock_allocate(ec_fop_data_t *fop, loc_t *loc)
+{
+ ec_t *ec = fop->xl->private;
+ ec_lock_t *lock;
+ int32_t err;
if ((loc->inode == NULL) ||
- (gf_uuid_is_null(loc->gfid) && gf_uuid_is_null(loc->inode->gfid)))
- {
- gf_log(xl->name, GF_LOG_ERROR, "Trying to lock based on an invalid "
- "inode");
+ (gf_uuid_is_null(loc->gfid) && gf_uuid_is_null(loc->inode->gfid))) {
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_INODE,
+ "Trying to lock based on an invalid "
+ "inode");
+
+ __ec_fop_set_error(fop, EINVAL);
return NULL;
}
lock = mem_get0(ec->lock_pool);
- if (lock != NULL)
- {
- lock->kind = kind;
- lock->good_mask = -1ULL;
+ if (lock != NULL) {
+ lock->good_mask = UINTPTR_MAX;
+ INIT_LIST_HEAD(&lock->owners);
INIT_LIST_HEAD(&lock->waiting);
- if (ec_loc_from_loc(xl, &lock->loc, loc) != 0)
- {
+ INIT_LIST_HEAD(&lock->frozen);
+ err = ec_loc_from_loc(fop->xl, &lock->loc, loc);
+ if (err != 0) {
mem_put(lock);
lock = NULL;
+
+ __ec_fop_set_error(fop, -err);
}
}
return lock;
}
-void ec_lock_destroy(ec_lock_t * lock)
+void
+ec_lock_destroy(ec_lock_t *lock)
{
loc_wipe(&lock->loc);
+ if (lock->fd != NULL) {
+ fd_unref(lock->fd);
+ }
mem_put(lock);
}
-int32_t ec_lock_compare(ec_lock_t * lock1, ec_lock_t * lock2)
+int32_t
+ec_lock_compare(ec_lock_t *lock1, ec_lock_t *lock2)
{
return gf_uuid_compare(lock1->loc.gfid, lock2->loc.gfid);
}
-ec_lock_link_t *ec_lock_insert(ec_fop_data_t *fop, ec_lock_t *lock,
- int32_t update)
+static void
+ec_lock_insert(ec_fop_data_t *fop, ec_lock_t *lock, uint32_t flags, loc_t *base,
+ off_t fl_start, uint64_t fl_size)
{
- ec_lock_t *new_lock, *tmp;
- ec_lock_link_t *link = NULL;
- int32_t tmp_update;
+ ec_lock_link_t *link;
- new_lock = lock;
+ /* This check is only prepared for up to 2 locks per fop. If more locks
+ * are needed this must be changed. */
if ((fop->lock_count > 0) &&
- (ec_lock_compare(fop->locks[0].lock, new_lock) > 0))
- {
- tmp = fop->locks[0].lock;
- fop->locks[0].lock = new_lock;
- new_lock = tmp;
-
- tmp_update = fop->locks_update;
- fop->locks_update = update;
- update = tmp_update;
+ (ec_lock_compare(fop->locks[0].lock, lock) < 0)) {
+ fop->first_lock = fop->lock_count;
+ } else {
+ /* When the first lock is added to the current fop, request lock
+ * counts from locks xlator to be able to determine if there is
+ * contention and release the lock sooner. */
+ if (fop->xdata == NULL) {
+ fop->xdata = dict_new();
+ if (fop->xdata == NULL) {
+ ec_fop_set_error(fop, ENOMEM);
+ return;
+ }
+ }
+ if (dict_set_str(fop->xdata, GLUSTERFS_INODELK_DOM_COUNT,
+ fop->xl->name) != 0) {
+ ec_fop_set_error(fop, ENOMEM);
+ return;
+ }
}
- fop->locks[fop->lock_count].lock = new_lock;
- fop->locks[fop->lock_count].fop = fop;
- fop->locks_update |= update << fop->lock_count;
+ link = &fop->locks[fop->lock_count++];
- fop->lock_count++;
+ link->lock = lock;
+ link->fop = fop;
+ link->update[EC_DATA_TXN] = (flags & EC_UPDATE_DATA) != 0;
+ link->update[EC_METADATA_TXN] = (flags & EC_UPDATE_META) != 0;
+ link->base = base;
+ link->fl_start = fl_start;
+ link->fl_end = ec_range_end_get(fl_start, fl_size);
- if (lock->timer != NULL) {
- link = lock->timer->data;
- ec_trace("UNLOCK_CANCELLED", link->fop, "lock=%p", lock);
- gf_timer_call_cancel(fop->xl->ctx, lock->timer);
- lock->timer = NULL;
- } else {
- lock->refs++;
- }
-
- return link;
+ lock->refs_pending++;
}
-void ec_lock_prepare_entry(ec_fop_data_t *fop, loc_t *loc, int32_t update)
+static void
+ec_lock_prepare_inode_internal(ec_fop_data_t *fop, loc_t *loc, uint32_t flags,
+ loc_t *base, off_t fl_start, uint64_t fl_size)
{
- ec_lock_t * lock = NULL;
- ec_inode_t * ctx = NULL;
- ec_lock_link_t *link = NULL;
- loc_t tmp;
+ ec_lock_t *lock = NULL;
+ ec_inode_t *ctx;
- if ((fop->parent != NULL) || (fop->error != 0))
- {
+ if ((fop->parent != NULL) || (fop->error != 0) || (loc->inode == NULL)) {
return;
}
- /* update is only 0 for 'opendir', which needs to lock the entry pointed
- * by loc instead of its parent.
- */
- if (update)
- {
- if (ec_loc_parent(fop->xl, loc, &tmp) != 0) {
- ec_fop_set_error(fop, EIO);
+ LOCK(&loc->inode->lock);
- return;
- }
+ ctx = __ec_inode_get(loc->inode, fop->xl);
+ if (ctx == NULL) {
+ __ec_fop_set_error(fop, ENOMEM);
+
+ goto unlock;
+ }
+
+ if (ctx->inode_lock != NULL) {
+ lock = ctx->inode_lock;
/* If there's another lock, make sure that it's not the same. Otherwise
* do not insert it.
*
* This can only happen on renames where source and target names are
* in the same directory. */
- if ((fop->lock_count > 0) &&
- (fop->locks[0].lock->loc.inode == tmp.inode)) {
- goto wipe;
- }
- } else {
- if (ec_loc_from_loc(fop->xl, &tmp, loc) != 0) {
- ec_fop_set_error(fop, EIO);
+ if ((fop->lock_count > 0) && (fop->locks[0].lock == lock)) {
+ /* Combine data/meta updates */
+ fop->locks[0].update[EC_DATA_TXN] |= (flags & EC_UPDATE_DATA) != 0;
+ fop->locks[0].update[EC_METADATA_TXN] |= (flags & EC_UPDATE_META) !=
+ 0;
+
+ /* Only one base inode is allowed per fop, so there shouldn't be
+ * overwrites here. */
+ if (base != NULL) {
+ fop->locks[0].base = base;
+ }
- return;
+ goto update_query;
}
- }
-
- LOCK(&tmp.inode->lock);
-
- ctx = __ec_inode_get(tmp.inode, fop->xl);
- if (ctx == NULL)
- {
- __ec_fop_set_error(fop, EIO);
- goto unlock;
- }
-
- if (ctx->entry_lock != NULL)
- {
- lock = ctx->entry_lock;
- ec_trace("LOCK_ENTRYLK", fop, "lock=%p, inode=%p, path=%s"
- "Lock already acquired",
- lock, tmp.inode, tmp.path);
+ ec_trace("LOCK_INODELK", fop,
+ "lock=%p, inode=%p. Lock already "
+ "acquired",
+ lock, loc->inode);
goto insert;
}
- lock = ec_lock_allocate(fop->xl, EC_LOCK_ENTRY, &tmp);
- if (lock == NULL)
- {
- __ec_fop_set_error(fop, EIO);
-
+ lock = ec_lock_allocate(fop, loc);
+ if (lock == NULL) {
goto unlock;
}
ec_trace("LOCK_CREATE", fop, "lock=%p", lock);
- lock->type = ENTRYLK_WRLCK;
+ lock->flock.l_type = F_WRLCK;
+ lock->flock.l_whence = SEEK_SET;
- lock->plock = &ctx->entry_lock;
- ctx->entry_lock = lock;
+ lock->ctx = ctx;
+ ctx->inode_lock = lock;
insert:
- link = ec_lock_insert(fop, lock, update);
-
+ ec_lock_insert(fop, lock, flags, base, fl_start, fl_size);
+update_query:
+ lock->query |= (flags & EC_QUERY_INFO) != 0;
unlock:
- UNLOCK(&tmp.inode->lock);
-
-wipe:
- loc_wipe(&tmp);
+ UNLOCK(&loc->inode->lock);
+}
- if (link != NULL) {
- ec_resume(link->fop, 0);
- }
+void
+ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, uint32_t flags,
+ off_t fl_start, uint64_t fl_size)
+{
+ ec_lock_prepare_inode_internal(fop, loc, flags, NULL, fl_start, fl_size);
}
-void ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, int32_t update)
+void
+ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, loc_t *base,
+ uint32_t flags)
{
- ec_lock_link_t *link = NULL;
- ec_lock_t * lock;
- ec_inode_t * ctx;
+ loc_t tmp;
+ int32_t err;
- if ((fop->parent != NULL) || (fop->error != 0) || (loc->inode == NULL))
- {
+ if (fop->error != 0) {
return;
}
- LOCK(&loc->inode->lock);
+ err = ec_loc_parent(fop->xl, loc, &tmp);
+ if (err != 0) {
+ ec_fop_set_error(fop, -err);
- ctx = __ec_inode_get(loc->inode, fop->xl);
- if (ctx == NULL)
- {
- __ec_fop_set_error(fop, EIO);
+ return;
+ }
- goto unlock;
+ if ((flags & EC_INODE_SIZE) != 0) {
+ flags ^= EC_INODE_SIZE;
+ } else {
+ base = NULL;
}
- if (ctx->inode_lock != NULL)
- {
- lock = ctx->inode_lock;
- ec_trace("LOCK_INODELK", fop, "lock=%p, inode=%p. Lock already "
- "acquired", lock, loc->inode);
+ ec_lock_prepare_inode_internal(fop, &tmp, flags, base, 0, EC_RANGE_FULL);
- goto insert;
+ loc_wipe(&tmp);
+}
+
+void
+ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags, off_t fl_start,
+ uint64_t fl_size)
+{
+ loc_t loc;
+ int32_t err;
+
+ if (fop->error != 0) {
+ return;
}
- lock = ec_lock_allocate(fop->xl, EC_LOCK_INODE, loc);
- if (lock == NULL)
- {
- __ec_fop_set_error(fop, EIO);
+ err = ec_loc_from_fd(fop->xl, &loc, fd);
+ if (err != 0) {
+ ec_fop_set_error(fop, -err);
- goto unlock;
+ return;
}
- ec_trace("LOCK_CREATE", fop, "lock=%p", lock);
+ ec_lock_prepare_inode_internal(fop, &loc, flags, NULL, fl_start, fl_size);
- lock->flock.l_type = F_WRLCK;
- lock->flock.l_whence = SEEK_SET;
+ loc_wipe(&loc);
+}
- lock->plock = &ctx->inode_lock;
- ctx->inode_lock = lock;
+gf_boolean_t
+ec_config_check(xlator_t *xl, ec_config_t *config)
+{
+ ec_t *ec;
-insert:
- link = ec_lock_insert(fop, lock, update);
+ ec = xl->private;
+ if ((config->version != EC_CONFIG_VERSION) ||
+ (config->algorithm != EC_CONFIG_ALGORITHM) ||
+ (config->gf_word_size != EC_GF_BITS) || (config->bricks != ec->nodes) ||
+ (config->redundancy != ec->redundancy) ||
+ (config->chunk_size != EC_METHOD_CHUNK_SIZE)) {
+ uint32_t data_bricks;
-unlock:
- UNLOCK(&loc->inode->lock);
+ /* This combination of version/algorithm requires the following
+ values. Incorrect values for these fields are a sign of
+ corruption:
- if (link != NULL) {
- ec_resume(link->fop, 0);
+ redundancy > 0
+ redundancy * 2 < bricks
+ gf_word_size must be a power of 2
+ chunk_size (in bits) must be a multiple of gf_word_size *
+ (bricks - redundancy) */
+
+ data_bricks = config->bricks - config->redundancy;
+ if ((config->redundancy < 1) ||
+ (config->redundancy * 2 >= config->bricks) ||
+ !ec_is_power_of_2(config->gf_word_size) ||
+ ((config->chunk_size * 8) % (config->gf_word_size * data_bricks) !=
+ 0)) {
+ gf_msg(xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_CONFIG,
+ "Invalid or corrupted config");
+ } else {
+ gf_msg(xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_CONFIG,
+ "Unsupported config "
+ "(V=%u, A=%u, W=%u, "
+ "N=%u, R=%u, S=%u)",
+ config->version, config->algorithm, config->gf_word_size,
+ config->bricks, config->redundancy, config->chunk_size);
+ }
+
+ return _gf_false;
}
+
+ return _gf_true;
}
-void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, int32_t update)
+gf_boolean_t
+ec_set_dirty_flag(ec_lock_link_t *link, ec_inode_t *ctx, uint64_t *dirty)
{
- loc_t loc;
+ gf_boolean_t set_dirty = _gf_false;
- if ((fop->parent != NULL) || (fop->error != 0))
- {
- return;
+ if (link->update[EC_DATA_TXN] && !ctx->dirty[EC_DATA_TXN]) {
+ if (!link->optimistic_changelog)
+ dirty[EC_DATA_TXN] = 1;
}
- if (ec_loc_from_fd(fop->xl, &loc, fd) == 0)
- {
- ec_lock_prepare_inode(fop, &loc, update);
-
- loc_wipe(&loc);
+ if (link->update[EC_METADATA_TXN] && !ctx->dirty[EC_METADATA_TXN]) {
+ if (!link->optimistic_changelog)
+ dirty[EC_METADATA_TXN] = 1;
}
- else
- {
- ec_fop_set_error(fop, EIO);
+
+ if (dirty[EC_METADATA_TXN] || dirty[EC_DATA_TXN]) {
+ set_dirty = _gf_true;
}
+
+ return set_dirty;
}
-int32_t ec_locked(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, dict_t * xdata)
+int32_t
+ec_prepare_update_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- ec_fop_data_t * fop = cookie;
- ec_lock_t * lock = NULL;
-
- if (op_ret >= 0)
- {
- lock = fop->data;
- lock->mask = fop->good;
- lock->acquired = 1;
+ struct list_head list;
+ ec_fop_data_t *fop = cookie, *parent, *tmp;
+ ec_lock_link_t *parent_link = fop->data;
+ ec_lock_link_t *link = NULL;
+ ec_lock_t *lock = NULL;
+ ec_inode_t *ctx;
+ gf_boolean_t release = _gf_false;
+ uint64_t provided_flags = 0;
+ uint64_t dirty[EC_VERSION_SIZE] = {0, 0};
+ lock = parent_link->lock;
+ parent = parent_link->fop;
+ ctx = lock->ctx;
- fop->parent->mask &= fop->good;
- fop->parent->locked++;
+ INIT_LIST_HEAD(&list);
+ provided_flags = EC_PROVIDED_FLAGS(parent_link->waiting_flags);
- ec_trace("LOCKED", fop->parent, "lock=%p", lock);
+ LOCK(&lock->loc.inode->lock);
- ec_lock(fop->parent);
- }
- else
+ list_for_each_entry(link, &lock->owners, owner_list)
{
- gf_log(this->name, GF_LOG_WARNING, "Failed to complete preop lock");
+ if ((link->waiting_flags & provided_flags) != 0) {
+ link->waiting_flags ^= (link->waiting_flags & provided_flags);
+ if (EC_NEEDED_FLAGS(link->waiting_flags) == 0)
+ list_add_tail(&link->fop->cbk_list, &list);
+ }
}
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_SIZE_VERS_GET_FAIL,
+ "Failed to get size and version : %s", ec_msg_str(fop));
- return 0;
-}
+ goto unlock;
+ }
-void ec_lock(ec_fop_data_t * fop)
-{
- ec_lock_t * lock;
+ if (EC_FLAGS_HAVE(provided_flags, EC_FLAG_XATTROP)) {
+ op_errno = -ec_dict_del_array(dict, EC_XATTR_VERSION, ctx->pre_version,
+ EC_VERSION_SIZE);
+ if (op_errno != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ EC_MSG_VER_XATTR_GET_FAIL, "Unable to get version xattr. %s",
+ ec_msg_str(fop));
+ goto unlock;
+ }
+ ctx->post_version[0] += ctx->pre_version[0];
+ ctx->post_version[1] += ctx->pre_version[1];
+
+ ctx->have_version = _gf_true;
+
+ if (lock->loc.inode->ia_type == IA_IFREG ||
+ lock->loc.inode->ia_type == IA_INVAL) {
+ op_errno = -ec_dict_del_number(dict, EC_XATTR_SIZE, &ctx->pre_size);
+ if (op_errno != 0) {
+ if (lock->loc.inode->ia_type == IA_IFREG) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ EC_MSG_SIZE_XATTR_GET_FAIL,
+ "Unable to get size xattr. %s", ec_msg_str(fop));
+ goto unlock;
+ }
+ } else {
+ ctx->post_size = ctx->pre_size;
- while (fop->locked < fop->lock_count)
- {
- lock = fop->locks[fop->locked].lock;
+ ctx->have_size = _gf_true;
+ }
- LOCK(&lock->loc.inode->lock);
+ op_errno = -ec_dict_del_config(dict, EC_XATTR_CONFIG, &ctx->config);
+ if (op_errno != 0) {
+ if ((lock->loc.inode->ia_type == IA_IFREG) ||
+ (op_errno != ENODATA)) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ EC_MSG_CONFIG_XATTR_GET_FAIL,
+ "Unable to get config xattr. %s", ec_msg_str(fop));
- if (lock->owner != NULL)
- {
- ec_trace("LOCK_WAIT", fop, "lock=%p", lock);
+ goto unlock;
+ }
+ } else {
+ if (!ec_config_check(parent->xl, &ctx->config)) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_CONFIG_XATTR_INVALID, "Invalid config xattr");
- list_add_tail(&fop->locks[fop->locked].wait_list, &lock->waiting);
+ op_errno = EINVAL;
- ec_sleep(fop);
+ goto unlock;
+ }
+ ctx->have_config = _gf_true;
+ }
+ }
+ ctx->have_info = _gf_true;
+ }
- UNLOCK(&lock->loc.inode->lock);
+ ec_set_dirty_flag(fop->data, ctx, dirty);
+ if (dirty[EC_METADATA_TXN] &&
+ (EC_FLAGS_HAVE(provided_flags, EC_FLAG_METADATA_DIRTY))) {
+ GF_ASSERT(!ctx->dirty[EC_METADATA_TXN]);
+ ctx->dirty[EC_METADATA_TXN] = 1;
+ }
- break;
+ if (dirty[EC_DATA_TXN] &&
+ (EC_FLAGS_HAVE(provided_flags, EC_FLAG_DATA_DIRTY))) {
+ GF_ASSERT(!ctx->dirty[EC_DATA_TXN]);
+ ctx->dirty[EC_DATA_TXN] = 1;
+ }
+ op_errno = 0;
+unlock:
+
+ lock->waiting_flags ^= provided_flags;
+
+ if (op_errno == 0) {
+ /* If the fop fails on any of the good bricks, it is important to mark
+ * it dirty and update versions right away if dirty was not set before.
+ */
+ if (lock->good_mask & ~(fop->good | fop->remaining)) {
+ release = _gf_true;
}
- lock->owner = fop;
- UNLOCK(&lock->loc.inode->lock);
+ if (parent_link->update[0] && !parent_link->dirty[0]) {
+ lock->release |= release;
+ }
- if (!lock->acquired)
- {
- ec_owner_set(fop->frame, lock);
+ if (parent_link->update[1] && !parent_link->dirty[1]) {
+ lock->release |= release;
+ }
- if (lock->kind == EC_LOCK_ENTRY)
- {
- ec_trace("LOCK_ACQUIRE", fop, "lock=%p, inode=%p, path=%s",
- lock, lock->loc.inode, lock->loc.path);
+ /* We don't allow the main fop to be executed on bricks that have not
+ * succeeded the initial xattrop. */
+ ec_lock_update_good(lock, fop);
- ec_entrylk(fop->frame, fop->xl, -1, EC_MINIMUM_ALL, ec_locked,
- lock, fop->xl->name, &lock->loc, NULL,
- ENTRYLK_LOCK, lock->type, NULL);
- }
- else
- {
- ec_trace("LOCK_ACQUIRE", fop, "lock=%p, inode=%p", lock,
- lock->loc.inode);
+ /*As of now only data healing marks bricks as healing*/
+ lock->healing |= fop->healing;
+ }
- ec_inodelk(fop->frame, fop->xl, -1, EC_MINIMUM_ALL, ec_locked,
- lock, fop->xl->name, &lock->loc, F_SETLKW,
- &lock->flock, NULL);
- }
+ UNLOCK(&lock->loc.inode->lock);
- break;
- }
+ while (!list_empty(&list)) {
+ tmp = list_entry(list.next, ec_fop_data_t, cbk_list);
+ list_del_init(&tmp->cbk_list);
- ec_trace("LOCK_REUSE", fop, "lock=%p", lock);
+ if (op_errno == 0) {
+ tmp->mask &= fop->good;
- if (lock->have_size)
- {
- fop->pre_size = fop->post_size = lock->size;
- fop->have_size = 1;
+ /*As of now only data healing marks bricks as healing*/
+ if (ec_is_data_fop(tmp->id)) {
+ tmp->healing |= fop->healing;
+ }
}
- fop->mask &= lock->good_mask;
- fop->locked++;
+ ec_resume(tmp, op_errno);
}
+
+ return 0;
}
-gf_boolean_t
-ec_config_check (ec_fop_data_t *fop, dict_t *xdata)
+static gf_boolean_t
+ec_set_needed_flag(ec_lock_t *lock, ec_lock_link_t *link, uint64_t flag)
{
- ec_t *ec;
+ uint64_t current;
- if (ec_dict_del_config(xdata, EC_XATTR_CONFIG, &fop->config) < 0) {
- gf_log(fop->xl->name, GF_LOG_ERROR, "Failed to get a valid "
- "config");
+ link->waiting_flags |= EC_FLAG_NEEDS(flag);
- ec_fop_set_error(fop, EIO);
+ current = EC_NEEDED_FLAGS(lock->waiting_flags);
+ if (!EC_FLAGS_HAVE(current, flag)) {
+ lock->waiting_flags |= EC_FLAG_NEEDS(flag);
+ link->waiting_flags |= EC_FLAG_PROVIDES(flag);
- return _gf_false;
+ return _gf_true;
}
- ec = fop->xl->private;
- if ((fop->config.version != EC_CONFIG_VERSION) ||
- (fop->config.algorithm != EC_CONFIG_ALGORITHM) ||
- (fop->config.gf_word_size != EC_GF_BITS) ||
- (fop->config.bricks != ec->nodes) ||
- (fop->config.redundancy != ec->redundancy) ||
- (fop->config.chunk_size != EC_METHOD_CHUNK_SIZE)) {
- uint32_t data_bricks;
+ return _gf_false;
+}
- /* This combination of version/algorithm requires the following
- values. Incorrect values for these fields are a sign of
- corruption:
+static uint64_t
+ec_set_xattrop_flags_and_params(ec_lock_t *lock, ec_lock_link_t *link,
+ uint64_t *dirty)
+{
+ uint64_t oldflags = 0;
+ uint64_t newflags = 0;
+ ec_inode_t *ctx = lock->ctx;
- redundancy > 0
- redundancy * 2 < bricks
- gf_word_size must be a power of 2
- chunk_size (in bits) must be a multiple of gf_word_size *
- (bricks - redundancy) */
+ oldflags = EC_NEEDED_FLAGS(lock->waiting_flags);
- data_bricks = fop->config.bricks - fop->config.redundancy;
- if ((fop->config.redundancy < 1) ||
- (fop->config.redundancy * 2 >= fop->config.bricks) ||
- !ec_is_power_of_2(fop->config.gf_word_size) ||
- ((fop->config.chunk_size * 8) % (fop->config.gf_word_size *
- data_bricks) != 0)) {
- gf_log(fop->xl->name, GF_LOG_ERROR, "Invalid or corrupted config");
- } else {
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unsupported config "
- "(V=%u, A=%u, W=%u, "
- "N=%u, R=%u, S=%u)",
- fop->config.version, fop->config.algorithm,
- fop->config.gf_word_size, fop->config.bricks,
- fop->config.redundancy, fop->config.chunk_size);
- }
+ if (lock->query && !ctx->have_info) {
+ ec_set_needed_flag(lock, link, EC_FLAG_XATTROP);
+ }
- ec_fop_set_error(fop, EIO);
+ if (dirty[EC_DATA_TXN]) {
+ if (!ec_set_needed_flag(lock, link, EC_FLAG_DATA_DIRTY)) {
+ dirty[EC_DATA_TXN] = 0;
+ }
+ }
- return _gf_false;
+ if (dirty[EC_METADATA_TXN]) {
+ if (!ec_set_needed_flag(lock, link, EC_FLAG_METADATA_DIRTY)) {
+ dirty[EC_METADATA_TXN] = 0;
+ }
}
+ newflags = EC_NEEDED_FLAGS(lock->waiting_flags);
- return _gf_true;
+ return oldflags ^ newflags;
}
-int32_t ec_get_size_version_set(call_frame_t * frame, void * cookie,
- xlator_t * this, int32_t op_ret,
- int32_t op_errno, inode_t * inode,
- struct iatt * buf, dict_t * xdata,
- struct iatt * postparent)
+void
+ec_get_size_version(ec_lock_link_t *link)
{
- ec_fop_data_t * fop = cookie;
- ec_inode_t * ctx;
- ec_lock_t *lock = NULL;
+ loc_t loc;
+ ec_lock_t *lock;
+ ec_inode_t *ctx;
+ ec_fop_data_t *fop;
+ dict_t *dict = NULL;
+ dict_t *xdata = NULL;
+ ec_t *ec = NULL;
+ int32_t error = 0;
+ gf_boolean_t set_dirty = _gf_false;
+ uint64_t allzero[EC_VERSION_SIZE] = {0, 0};
+ uint64_t dirty[EC_VERSION_SIZE] = {0, 0};
+ lock = link->lock;
+ ctx = lock->ctx;
+ fop = link->fop;
+ ec = fop->xl->private;
+ uint64_t changed_flags = 0;
- if (op_ret >= 0)
- {
- if ((buf->ia_type == IA_IFREG) && !ec_config_check(fop, xdata)) {
- return 0;
+ if (ec->optimistic_changelog && !(ec->node_mask & ~link->lock->good_mask) &&
+ !ec_is_data_fop(fop->id))
+ link->optimistic_changelog = _gf_true;
+
+ memset(&loc, 0, sizeof(loc));
+
+ LOCK(&lock->loc.inode->lock);
+
+ set_dirty = ec_set_dirty_flag(link, ctx, dirty);
+
+ /* If ec metadata has already been retrieved, do not try again. */
+ if (ctx->have_info) {
+ if (ec_is_data_fop(fop->id)) {
+ fop->healing |= lock->healing;
}
+ if (!set_dirty)
+ goto unlock;
+ }
+
+ /* Determine if there's something we need to retrieve for the current
+ * operation. */
+ if (!set_dirty && !lock->query && (lock->loc.inode->ia_type != IA_IFREG) &&
+ (lock->loc.inode->ia_type != IA_INVAL)) {
+ goto unlock;
+ }
+
+ changed_flags = ec_set_xattrop_flags_and_params(lock, link, dirty);
+ if (link->waiting_flags) {
+ /* This fop needs to wait until all its flags are cleared which
+ * potentially can be cleared by other xattrops that are already
+ * wound*/
+ ec_sleep(fop);
+ } else {
+ GF_ASSERT(!changed_flags);
+ }
- LOCK(&inode->lock);
+unlock:
+ UNLOCK(&lock->loc.inode->lock);
- ctx = __ec_inode_get(inode, this);
- if (ctx != NULL) {
- if (ctx->inode_lock != NULL) {
- lock = ctx->inode_lock;
- lock->version = fop->answer->version;
+ if (!changed_flags)
+ goto out;
- if (buf->ia_type == IA_IFREG) {
- lock->have_size = 1;
- lock->size = buf->ia_size;
- }
+ dict = dict_new();
+ if (dict == NULL) {
+ error = -ENOMEM;
+ goto out;
+ }
+
+ if (EC_FLAGS_HAVE(changed_flags, EC_FLAG_XATTROP)) {
+ /* Once we know that an xattrop will be needed,
+ * we try to get all available information in a
+ * single call. */
+ error = ec_dict_set_array(dict, EC_XATTR_VERSION, allzero,
+ EC_VERSION_SIZE);
+ if (error != 0) {
+ goto out;
+ }
+
+ if (lock->loc.inode->ia_type == IA_IFREG ||
+ lock->loc.inode->ia_type == IA_INVAL) {
+ error = ec_dict_set_number(dict, EC_XATTR_SIZE, 0);
+ if (error == 0) {
+ error = ec_dict_set_number(dict, EC_XATTR_CONFIG, 0);
}
- if (ctx->entry_lock != NULL) {
- lock = ctx->entry_lock;
- lock->version = fop->answer->version;
+ if (error != 0) {
+ goto out;
}
- }
- UNLOCK(&inode->lock);
+ xdata = dict_new();
+ if (xdata == NULL || dict_set_int32(xdata, GF_GET_SIZE, 1)) {
+ error = -ENOMEM;
+ goto out;
+ }
+ }
+ }
- if (lock != NULL)
- {
- // Only update parent mask if the lookup has been made with
- // inode locked.
- fop->parent->mask &= fop->good;
+ if (memcmp(allzero, dirty, sizeof(allzero))) {
+ error = ec_dict_set_array(dict, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE);
+ if (error != 0) {
+ goto out;
}
+ }
- if (buf->ia_type == IA_IFREG) {
- fop->parent->pre_size = fop->parent->post_size = buf->ia_size;
- fop->parent->have_size = 1;
+ fop->frame->root->uid = 0;
+ fop->frame->root->gid = 0;
+
+ /* For normal fops, ec_[f]xattrop() must succeed on at least
+ * EC_MINIMUM_MIN bricks, however when this is called as part of a
+ * self-heal operation the mask of target bricks (fop->mask) could
+ * contain less than EC_MINIMUM_MIN bricks, causing the xattrop to
+ * always fail. Thus we always use the same minimum used for the main
+ * fop.
+ */
+ if (lock->fd == NULL) {
+ error = ec_loc_from_loc(fop->xl, &loc, &lock->loc);
+ if (error != 0) {
+ goto out;
}
+ if (gf_uuid_is_null(loc.pargfid)) {
+ if (loc.parent != NULL) {
+ inode_unref(loc.parent);
+ loc.parent = NULL;
+ }
+ GF_FREE((char *)loc.path);
+ loc.path = NULL;
+ loc.name = NULL;
+ }
+
+ ec_xattrop(fop->frame, fop->xl, fop->mask, fop->minimum,
+ ec_prepare_update_cbk, link, &loc, GF_XATTROP_ADD_ARRAY64,
+ dict, xdata);
+ } else {
+ ec_fxattrop(fop->frame, fop->xl, fop->mask, fop->minimum,
+ ec_prepare_update_cbk, link, lock->fd,
+ GF_XATTROP_ADD_ARRAY64, dict, xdata);
}
- else
+
+ error = 0;
+
+out:
+ fop->frame->root->uid = fop->uid;
+ fop->frame->root->gid = fop->gid;
+
+ loc_wipe(&loc);
+
+ if (dict != NULL) {
+ dict_unref(dict);
+ }
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ if (error != 0) {
+ ec_fop_set_error(fop, -error);
+ }
+}
+
+gf_boolean_t
+__ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t *size)
+{
+ ec_inode_t *ctx;
+ gf_boolean_t found = _gf_false;
+
+ ctx = __ec_inode_get(inode, fop->xl);
+ if (ctx == NULL) {
+ goto out;
+ }
+
+ if (ctx->have_size) {
+ *size = ctx->post_size;
+ found = _gf_true;
+ }
+
+out:
+ return found;
+}
+
+gf_boolean_t
+ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t *size)
+{
+ gf_boolean_t found = _gf_false;
+
+ LOCK(&inode->lock);
{
- gf_log(this->name, GF_LOG_WARNING, "Failed to get size and version "
- "(error %d)", op_errno);
- ec_fop_set_error(fop, op_errno);
+ found = __ec_get_inode_size(fop, inode, size);
}
+ UNLOCK(&inode->lock);
- return 0;
+ return found;
}
gf_boolean_t
-ec_is_data_fop (glusterfs_fop_t fop)
-{
- switch (fop) {
- case GF_FOP_WRITE:
- case GF_FOP_TRUNCATE:
- case GF_FOP_FTRUNCATE:
- case GF_FOP_FALLOCATE:
- case GF_FOP_DISCARD:
- case GF_FOP_ZEROFILL:
- return _gf_true;
- default:
- return _gf_false;
- }
- return _gf_false;
+__ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t size)
+{
+ ec_inode_t *ctx;
+ gf_boolean_t found = _gf_false;
+
+ ctx = __ec_inode_get(inode, fop->xl);
+ if (ctx == NULL) {
+ goto out;
+ }
+
+ /* Normal fops always have ctx->have_size set. However self-heal calls this
+ * to prepare the inode, so ctx->have_size will be false. In this case we
+ * prepare both pre_size and post_size, and set have_size and have_info to
+ * true. */
+ if (!ctx->have_size) {
+ ctx->pre_size = size;
+ ctx->have_size = ctx->have_info = _gf_true;
+ }
+ ctx->post_size = size;
+
+ found = _gf_true;
+
+out:
+ return found;
}
-int32_t
-ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+gf_boolean_t
+ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t size)
{
- ec_fop_data_t *fop = cookie, *parent;
- ec_lock_t *lock = NULL;
+ gf_boolean_t found = _gf_false;
- if (op_ret >= 0) {
- parent = fop->parent;
- while ((parent != NULL) && (parent->locks[0].lock == NULL)) {
- parent = parent->parent;
- }
- if (parent == NULL) {
- return 0;
- }
+ LOCK(&inode->lock);
+ {
+ found = __ec_set_inode_size(fop, inode, size);
+ }
+ UNLOCK(&inode->lock);
- lock = parent->locks[0].lock;
- lock->is_dirty = _gf_true;
+ return found;
+}
- if (!ec_config_check(fop, dict)) {
- return 0;
- }
+static void
+ec_release_stripe_cache(ec_inode_t *ctx)
+{
+ ec_stripe_list_t *stripe_cache = NULL;
+ ec_stripe_t *stripe = NULL;
- LOCK(&lock->loc.inode->lock);
+ stripe_cache = &ctx->stripe_cache;
+ while (!list_empty(&stripe_cache->lru)) {
+ stripe = list_first_entry(&stripe_cache->lru, ec_stripe_t, lru);
+ list_del(&stripe->lru);
+ GF_FREE(stripe);
+ }
+ stripe_cache->count = 0;
+ stripe_cache->max = 0;
+}
- if ((ec_dict_del_number(dict, EC_XATTR_VERSION, &lock->version) != 0) ||
- (ec_dict_del_number(dict, EC_XATTR_SIZE, &lock->size) != 0)) {
- UNLOCK(&lock->loc.inode->lock);
+void
+ec_clear_inode_info(ec_fop_data_t *fop, inode_t *inode)
+{
+ ec_inode_t *ctx;
- ec_fop_set_error(fop, EIO);
+ LOCK(&inode->lock);
- return 0;
- }
+ ctx = __ec_inode_get(inode, fop->xl);
+ if (ctx == NULL) {
+ goto unlock;
+ }
- lock->have_size = 1;
+ ec_release_stripe_cache(ctx);
+ ctx->have_info = _gf_false;
+ ctx->have_config = _gf_false;
+ ctx->have_version = _gf_false;
+ ctx->have_size = _gf_false;
- UNLOCK(&lock->loc.inode->lock);
+ memset(&ctx->config, 0, sizeof(ctx->config));
+ memset(ctx->pre_version, 0, sizeof(ctx->pre_version));
+ memset(ctx->post_version, 0, sizeof(ctx->post_version));
+ ctx->pre_size = ctx->post_size = 0;
+ memset(ctx->dirty, 0, sizeof(ctx->dirty));
- fop->parent->mask &= fop->good;
- /*As of now only data healing marks bricks as healing*/
- if (ec_is_data_fop (fop->parent->id))
- fop->parent->healing |= fop->healing;
+unlock:
+ UNLOCK(&inode->lock);
+}
+
+int32_t
+ec_get_real_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_lock_link_t *link;
- fop->parent->pre_size = fop->parent->post_size = lock->size;
- fop->parent->have_size = 1;
+ if (op_ret >= 0) {
+ link = fop->data;
+ link->size = buf->ia_size;
} else {
- gf_log(this->name, GF_LOG_WARNING,
- "Failed to get size and version (error %d: %s)", op_errno,
- strerror (op_errno));
- ec_fop_set_error(fop, op_errno);
+ /* Prevent failure of parent fop. */
+ fop->error = 0;
}
return 0;
}
-void ec_get_size_version(ec_fop_data_t * fop)
+/* This function is used to get the trusted.ec.size xattr from a file when
+ * no lock is needed on the inode. This is only required to maintain iatt
+ * structs on fops that manipulate directory entries but do not operate
+ * directly on the inode, like link, rename, ...
+ *
+ * Any error processing this request is ignored. In the worst case, an invalid
+ * or not up to date value in the iatt could cause some cache invalidation.
+ */
+void
+ec_get_real_size(ec_lock_link_t *link)
{
- loc_t loc;
- dict_t * xdata;
- uid_t uid;
- gid_t gid;
- int32_t error = ENOMEM;
+ ec_fop_data_t *fop;
+ dict_t *xdata;
- if (fop->have_size)
- {
+ if (link->base == NULL || link->base->inode == NULL) {
return;
}
- if ((fop->parent != NULL) && fop->parent->have_size)
- {
- fop->pre_size = fop->parent->pre_size;
- fop->post_size = fop->parent->post_size;
+ if (link->base->inode->ia_type != IA_IFREG) {
+ return;
+ }
- fop->have_size = 1;
+ fop = link->fop;
+ if (ec_get_inode_size(fop, link->base->inode, &link->size)) {
return;
}
- memset(&loc, 0, sizeof(loc));
-
xdata = dict_new();
- if (xdata == NULL)
- {
- goto out;
+ if (xdata == NULL) {
+ return;
}
- if ((dict_set_uint64(xdata, EC_XATTR_VERSION, 0) != 0) ||
- (dict_set_uint64(xdata, EC_XATTR_SIZE, 0) != 0) ||
- (dict_set_uint64(xdata, EC_XATTR_CONFIG, 0) != 0) ||
- (dict_set_uint64(xdata, EC_XATTR_DIRTY, 0) != 0))
- {
+ if (ec_dict_set_number(xdata, EC_XATTR_SIZE, 0) != 0) {
goto out;
}
- uid = fop->frame->root->uid;
- gid = fop->frame->root->gid;
+ /* Send a simple lookup. A single answer is considered ok since this value
+ * is only used to return an iatt struct related to an inode that is not
+ * locked and have not suffered any operation. */
+ ec_lookup(fop->frame, fop->xl, fop->mask, 1, ec_get_real_size_cbk, link,
+ link->base, xdata);
- fop->frame->root->uid = 0;
- fop->frame->root->gid = 0;
+out:
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+}
+
+static void
+ec_lock_update_fd(ec_lock_t *lock, ec_fop_data_t *fop)
+{
+ /* If the fop has an fd available, attach it to the lock structure to be
+ * able to do fxattrop calls instead of xattrop. */
+ if (fop->use_fd && (lock->fd == NULL)) {
+ lock->fd = __fd_ref(fop->fd);
+ }
+}
- error = EIO;
+static gf_boolean_t
+ec_link_has_lock_conflict(ec_lock_link_t *link, gf_boolean_t waitlist_check)
+{
+ ec_lock_link_t *trav_link = NULL;
- if (!fop->use_fd)
+ list_for_each_entry(trav_link, &link->lock->owners, owner_list)
{
- if (ec_loc_from_loc(fop->xl, &loc, &fop->loc[0]) != 0)
- {
- goto out;
+ if (ec_lock_conflict(trav_link, link))
+ return _gf_true;
+ }
+
+ if (!waitlist_check)
+ return _gf_false;
+
+ list_for_each_entry(trav_link, &link->lock->waiting, wait_list)
+ {
+ if (ec_lock_conflict(trav_link, link))
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+static void
+ec_lock_wake_shared(ec_lock_t *lock, struct list_head *list)
+{
+ ec_fop_data_t *fop;
+ ec_lock_link_t *link;
+ gf_boolean_t conflict = _gf_false;
+
+ while (!conflict && !list_empty(&lock->waiting)) {
+ link = list_entry(lock->waiting.next, ec_lock_link_t, wait_list);
+ fop = link->fop;
+
+ /* If lock is not acquired, at most one fop can be assigned as owner.
+ * The following fops will need to wait in the lock->waiting queue
+ * until the lock has been fully acquired. */
+ conflict = !lock->acquired;
+
+ /* If the fop is not shareable, only this fop can be assigned as owner.
+ * Other fops will need to wait until this one finishes. */
+ if (ec_link_has_lock_conflict(link, _gf_false)) {
+ conflict = _gf_true;
}
- if (gf_uuid_is_null(loc.pargfid))
- {
- if (loc.parent != NULL)
- {
- inode_unref(loc.parent);
- loc.parent = NULL;
- }
- GF_FREE((char *)loc.path);
- loc.path = NULL;
- loc.name = NULL;
+
+ /* If only one fop is allowed, it can be assigned as the owner of the
+ * lock only if there weren't any other owner. */
+ if (conflict && !list_empty(&lock->owners)) {
+ break;
}
- } else if (ec_loc_from_fd(fop->xl, &loc, fop->fd) != 0) {
- goto out;
+
+ list_move_tail(&link->wait_list, list);
+
+ list_add_tail(&link->owner_list, &lock->owners);
+ lock->refs_owners++;
+
+ ec_lock_update_fd(lock, fop);
}
+}
- /* For normal fops, ec_lookup() must succeed on at least EC_MINIMUM_MIN
- * bricks, however when this is called as part of a self-heal operation
- * the mask of target bricks (fop->mask) could contain less than
- * EC_MINIMUM_MIN bricks, causing the lookup to always fail. Thus we
- * always use the same minimum used for the main fop.
- */
- ec_lookup(fop->frame, fop->xl, fop->mask, fop->minimum,
- ec_get_size_version_set, NULL, &loc, xdata);
+static void
+ec_lock_apply(ec_lock_link_t *link)
+{
+ ec_fop_data_t *fop = link->fop;
- fop->frame->root->uid = uid;
- fop->frame->root->gid = gid;
+ fop->mask &= link->lock->good_mask;
+ fop->locked++;
- error = 0;
+ ec_get_size_version(link);
+ ec_get_real_size(link);
+}
-out:
- loc_wipe(&loc);
+gf_boolean_t
+ec_lock_acquire(ec_lock_link_t *link);
- if (xdata != NULL)
- {
- dict_unref(xdata);
- }
+static void
+ec_lock_resume_shared(struct list_head *list)
+{
+ ec_lock_link_t *link;
- if (error != 0) {
- ec_fop_set_error(fop, error);
+ while (!list_empty(list)) {
+ link = list_entry(list->next, ec_lock_link_t, wait_list);
+ list_del_init(&link->wait_list);
+
+ if (link->lock->acquired) {
+ ec_lock_apply(link);
+ ec_lock(link->fop);
+ } else {
+ GF_ASSERT(list_empty(list));
+
+ ec_lock_acquire(link);
+ }
+
+ ec_resume(link->fop, 0);
}
}
-void ec_prepare_update(ec_fop_data_t *fop)
+void
+ec_lock_acquired(ec_lock_link_t *link)
{
- loc_t loc;
- dict_t *xdata;
- ec_fop_data_t *tmp;
+ struct list_head list;
ec_lock_t *lock;
- uid_t uid;
- gid_t gid;
- int32_t error = ENOMEM;
+ ec_fop_data_t *fop;
+
+ lock = link->lock;
+ fop = link->fop;
+
+ ec_trace("LOCKED", fop, "lock=%p", lock);
- tmp = fop;
- while ((tmp != NULL) && (tmp->locks[0].lock == NULL)) {
- tmp = tmp->parent;
+ INIT_LIST_HEAD(&list);
+
+ LOCK(&lock->loc.inode->lock);
+
+ lock->acquired = _gf_true;
+ if (lock->contention) {
+ lock->release = _gf_true;
+ lock->contention = _gf_false;
}
- if ((tmp != NULL) && tmp->locks[0].lock->is_dirty) {
- lock = tmp->locks[0].lock;
- fop->pre_size = fop->post_size = lock->size;
- fop->have_size = 1;
+ ec_lock_update_fd(lock, fop);
+ ec_lock_wake_shared(lock, &list);
- return;
+ UNLOCK(&lock->loc.inode->lock);
+
+ ec_lock_apply(link);
+
+ if (fop->use_fd &&
+ (link->update[EC_DATA_TXN] || link->update[EC_METADATA_TXN])) {
+ /* Try to reopen closed fd's only if lock has succeeded. */
+ ec_fix_open(fop, lock->mask);
}
- memset(&loc, 0, sizeof(loc));
+ ec_lock_resume_shared(&list);
+}
- xdata = dict_new();
- if (xdata == NULL) {
- goto out;
+int32_t
+ec_locked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_lock_link_t *link = NULL;
+ ec_lock_t *lock = NULL;
+
+ link = fop->data;
+ lock = link->lock;
+ if (op_ret >= 0) {
+ lock->mask = lock->good_mask = fop->good;
+ lock->healing = 0;
+
+ ec_lock_acquired(link);
+ ec_lock(fop->parent);
+ } else {
+ LOCK(&lock->loc.inode->lock);
+ {
+ lock->contention = _gf_false;
+ }
+ UNLOCK(&lock->loc.inode->lock);
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_PREOP_LOCK_FAILED,
+ "Failed to complete preop lock");
}
- if ((ec_dict_set_number(xdata, EC_XATTR_VERSION, 0) != 0) ||
- (ec_dict_set_number(xdata, EC_XATTR_SIZE, 0) != 0) ||
- (ec_dict_set_number(xdata, EC_XATTR_CONFIG, 0) != 0) ||
- (ec_dict_set_number(xdata, EC_XATTR_DIRTY, 1) != 0)) {
- goto out;
+
+ return 0;
+}
+
+gf_boolean_t
+ec_lock_acquire(ec_lock_link_t *link)
+{
+ ec_lock_t *lock;
+ ec_fop_data_t *fop;
+ gf_lkowner_t lk_owner;
+
+ lock = link->lock;
+ fop = link->fop;
+
+ if (!lock->acquired) {
+ set_lk_owner_from_ptr(&lk_owner, lock);
+
+ ec_trace("LOCK_ACQUIRE", fop, "lock=%p, inode=%p", lock,
+ lock->loc.inode);
+
+ lock->flock.l_type = F_WRLCK;
+ ec_inodelk(fop->frame, fop->xl, &lk_owner, -1, EC_MINIMUM_ALL,
+ ec_locked, link, fop->xl->name, &lock->loc, F_SETLKW,
+ &lock->flock, NULL);
+
+ return _gf_false;
}
- uid = fop->frame->root->uid;
- gid = fop->frame->root->gid;
+ ec_trace("LOCK_REUSE", fop, "lock=%p", lock);
- fop->frame->root->uid = 0;
- fop->frame->root->gid = 0;
+ ec_lock_acquired(link);
- error = EIO;
+ return _gf_true;
+}
- if (!fop->use_fd) {
- if (ec_loc_from_loc(fop->xl, &loc, &fop->loc[0]) != 0) {
- goto out;
- }
+static ec_lock_link_t *
+ec_lock_timer_cancel(xlator_t *xl, ec_lock_t *lock)
+{
+ ec_lock_link_t *timer_link;
- ec_xattrop(fop->frame, fop->xl, fop->mask, fop->minimum,
- ec_prepare_update_cbk, NULL, &loc, GF_XATTROP_ADD_ARRAY64,
- xdata, NULL);
+ /* If we don't have any timer, there's nothing to cancel. */
+ if (lock->timer == NULL) {
+ return NULL;
+ }
+
+ /* We are trying to access a lock that has an unlock timer active.
+ * This means that the lock must be idle, i.e. no fop can be in the
+ * owner, waiting or frozen lists. It also means that the lock cannot
+ * have been marked as being released (this is done without timers).
+ * There should only be one owner reference, but it's possible that
+ * some fops are being prepared to use this lock. */
+ GF_ASSERT((lock->refs_owners == 1) && list_empty(&lock->owners) &&
+ list_empty(&lock->waiting));
+
+ /* We take the timer_link before cancelling the timer, since a
+ * successful cancellation will destroy it. It must not be NULL
+ * because it references the fop responsible for the delayed unlock
+ * that we are currently trying to cancel. */
+ timer_link = lock->timer->data;
+ GF_ASSERT(timer_link != NULL);
+
+ if (gf_timer_call_cancel(xl->ctx, lock->timer) < 0) {
+ /* It's too late to avoid the execution of the timer callback.
+ * Since we need to be sure that the callback has access to all
+ * needed resources, we cannot resume the execution of the
+ * timer fop now. This will be done in the callback. */
+ timer_link = NULL;
} else {
- ec_fxattrop(fop->frame, fop->xl, fop->mask, fop->minimum,
- ec_prepare_update_cbk, NULL, fop->fd,
- GF_XATTROP_ADD_ARRAY64, xdata, NULL);
+ /* The timer has been cancelled. The fop referenced by
+ * timer_link holds the last reference. The caller is
+ * responsible to release it when not needed anymore. */
+ ec_trace("UNLOCK_CANCELLED", timer_link->fop, "lock=%p", lock);
+ }
+
+ /* We have two options here:
+ *
+ * 1. The timer has been successfully cancelled.
+ *
+ * This is the easiest case and we can continue with the currently
+ * acquired lock.
+ *
+ * 2. The timer callback has already been fired.
+ *
+ * In this case we have not been able to cancel the timer before
+ * the timer callback has been fired, but we also know that
+ * lock->timer != NULL. This means that the timer callback is still
+ * trying to acquire the inode mutex that we currently own. We are
+ * safe until we release it. In this case we can safely clear
+ * lock->timer. This will cause that the timer callback does nothing
+ * once it acquires the mutex.
+ */
+ lock->timer = NULL;
+
+ return timer_link;
+}
+
+static gf_boolean_t
+ec_lock_assign_owner(ec_lock_link_t *link)
+{
+ ec_fop_data_t *fop;
+ ec_lock_t *lock;
+ ec_lock_link_t *timer_link = NULL;
+ gf_boolean_t assigned = _gf_false;
+
+ /* The link cannot be in any list because we have just finished preparing
+ * it. */
+ GF_ASSERT(list_empty(&link->wait_list));
+
+ fop = link->fop;
+ lock = link->lock;
+
+ LOCK(&lock->loc.inode->lock);
+
+ /* Since the link has just been prepared but it's not active yet, the
+ * refs_pending must be one at least (the ref owned by this link). */
+ GF_ASSERT(lock->refs_pending > 0);
+ /* The link is not pending any more. It will be assigned to the owner,
+ * waiting or frozen list. */
+ lock->refs_pending--;
+
+ if (lock->release) {
+ ec_trace("LOCK_QUEUE_FREEZE", fop, "lock=%p", lock);
+
+ /* When lock->release is set, we'll unlock the lock as soon as
+ * possible, meaning that we won't use a timer. */
+ GF_ASSERT(lock->timer == NULL);
+
+ /* The lock is marked to be released. We can still have owners and fops
+ * in the waiting ilist f they have been added before the lock has been
+ * marked to be released. However new fops are put into the frozen list
+ * to wait for the next unlock/lock cycle. */
+ list_add_tail(&link->wait_list, &lock->frozen);
+
+ goto unlock;
}
- fop->frame->root->uid = uid;
- fop->frame->root->gid = gid;
+ /* The lock is not marked to be released, so the frozen list should be
+ * empty. */
+ GF_ASSERT(list_empty(&lock->frozen));
- error = 0;
+ timer_link = ec_lock_timer_cancel(fop->xl, lock);
-out:
- loc_wipe(&loc);
+ if (!list_empty(&lock->owners)) {
+ /* There are other owners of this lock. We can only take ownership if
+ * the lock is already acquired and doesn't have conflict with existing
+ * owners, or waiters(to prevent starvation).
+ * Otherwise we need to wait.
+ */
+ if (!lock->acquired || ec_link_has_lock_conflict(link, _gf_true)) {
+ ec_trace("LOCK_QUEUE_WAIT", fop, "lock=%p", lock);
- if (xdata != NULL) {
- dict_unref(xdata);
+ list_add_tail(&link->wait_list, &lock->waiting);
+
+ goto unlock;
+ }
}
- if (error != 0) {
- ec_fop_set_error(fop, error);
+ list_add_tail(&link->owner_list, &lock->owners);
+
+ /* If timer_link is not NULL, it means that we have inherited the owner
+ * reference assigned to the timer fop. In this case we simply reuse it.
+ * Otherwise we need to increase the number of owners. */
+ if (timer_link == NULL) {
+ lock->refs_owners++;
+ }
+
+ assigned = _gf_true;
+
+unlock:
+ if (!assigned) {
+ /* We have not been able to take ownership of this lock. The fop must
+ * be put to sleep. */
+ ec_sleep(fop);
+ }
+
+ UNLOCK(&lock->loc.inode->lock);
+
+ /* If we have cancelled the timer, we need to resume the fop that was
+ * waiting for it. */
+ if (timer_link != NULL) {
+ ec_resume(timer_link->fop, 0);
}
+
+ return assigned;
}
-int32_t ec_unlocked(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+static void
+ec_lock_next_owner(ec_lock_link_t *link, ec_cbk_data_t *cbk,
+ gf_boolean_t release)
{
- ec_fop_data_t *fop = cookie;
+ struct list_head list;
+ ec_lock_t *lock = link->lock;
+ ec_fop_data_t *fop = link->fop;
+ ec_inode_t *ctx = lock->ctx;
- if (op_ret < 0) {
- gf_log(this->name, GF_LOG_WARNING, "entry/inode unlocking failed (%s)",
- ec_fop_name(fop->parent->id));
- } else {
- ec_trace("UNLOCKED", fop->parent, "lock=%p", fop->data);
+ INIT_LIST_HEAD(&list);
+
+ LOCK(&lock->loc.inode->lock);
+
+ ec_trace("LOCK_DONE", fop, "lock=%p", lock);
+
+ /* Current link must belong to the owner list of the lock. We don't
+ * decrement lock->refs_owners here because the inode mutex is released
+ * before ec_unlock() is called and we need to know when the last owner
+ * unlocks the lock to do proper cleanup. lock->refs_owners is used for
+ * this task. */
+ GF_ASSERT((lock->refs_owners > 0) && !list_empty(&link->owner_list));
+ list_del_init(&link->owner_list);
+
+ lock->release |= release;
+
+ if ((fop->error == 0) && (cbk != NULL) && (cbk->op_ret >= 0)) {
+ if (link->update[0]) {
+ ctx->post_version[0]++;
+ }
+ if (link->update[1]) {
+ ctx->post_version[1]++;
+ }
+ /* If the fop fails on any of the good bricks, it is important to mark
+ * it dirty and update versions right away. */
+ if (link->update[0] || link->update[1]) {
+ if (lock->good_mask & ~(fop->good | fop->remaining)) {
+ lock->release = _gf_true;
+ }
+ }
}
- return 0;
+ if (fop->healing) {
+ lock->healing = fop->healing & (fop->good | fop->remaining);
+ }
+ ec_lock_update_good(lock, fop);
+
+ ec_lock_wake_shared(lock, &list);
+
+ UNLOCK(&lock->loc.inode->lock);
+
+ ec_lock_resume_shared(&list);
}
-void ec_unlock_lock(ec_fop_data_t *fop, ec_lock_t *lock)
+void
+ec_lock(ec_fop_data_t *fop)
{
- if ((lock->mask != 0) && lock->acquired) {
- ec_owner_set(fop->frame, lock);
+ ec_lock_link_t *link;
- switch (lock->kind) {
- case EC_LOCK_ENTRY:
- ec_trace("UNLOCK_ENTRYLK", fop, "lock=%p, inode=%p, path=%s", lock,
- lock->loc.inode, lock->loc.path);
+ /* There is a chance that ec_resume is called on fop even before ec_sleep.
+ * Which can result in refs == 0 for fop leading to use after free in this
+ * function when it calls ec_sleep so do ec_sleep at start and ec_resume at
+ * the end of this function.*/
+ ec_sleep(fop);
- ec_entrylk(fop->frame, fop->xl, lock->mask, EC_MINIMUM_ALL,
- ec_unlocked, lock, fop->xl->name, &lock->loc, NULL,
- ENTRYLK_UNLOCK, lock->type, NULL);
+ while (fop->locked < fop->lock_count) {
+ /* Since there are only up to 2 locks per fop, this xor will change
+ * the order of the locks if fop->first_lock is 1. */
+ link = &fop->locks[fop->locked ^ fop->first_lock];
+ if (!ec_lock_assign_owner(link) || !ec_lock_acquire(link)) {
break;
+ }
+ }
- case EC_LOCK_INODE:
- lock->flock.l_type = F_UNLCK;
- ec_trace("UNLOCK_INODELK", fop, "lock=%p, inode=%p", lock,
- lock->loc.inode);
+ ec_resume(fop, 0);
+}
- ec_inodelk(fop->frame, fop->xl, lock->mask, EC_MINIMUM_ALL,
- ec_unlocked, lock, fop->xl->name, &lock->loc, F_SETLK,
- &lock->flock, NULL);
+void
+ec_lock_unfreeze(ec_lock_link_t *link)
+{
+ struct list_head list;
+ ec_lock_t *lock;
+ gf_boolean_t destroy = _gf_false;
- break;
+ lock = link->lock;
- default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Invalid lock type");
- }
+ INIT_LIST_HEAD(&list);
+
+ LOCK(&lock->loc.inode->lock);
+
+ /* The lock must be marked to be released here, since we have just released
+ * it and any attempt to assign it to more fops must have added them to the
+ * frozen list. We can only have one active reference here: the one that
+ * is processing this unfreeze. */
+ GF_ASSERT(lock->release && (lock->refs_owners == 1));
+ lock->release = _gf_false;
+ lock->refs_owners = 0;
+
+ lock->acquired = _gf_false;
+
+ /* We are unfreezing a lock. This means that the lock has already been
+ * released. In this state it shouldn't have a pending timer nor have any
+ * owner, and the waiting list should be empty. Only the frozen list can
+ * contain some fop. */
+ GF_ASSERT((lock->timer == NULL) && list_empty(&lock->waiting) &&
+ list_empty(&lock->owners));
+
+ /* We move all frozen fops to the waiting list. */
+ list_splice_init(&lock->frozen, &lock->waiting);
+
+ /* If we don't have any fop waiting nor there are any prepared fops using
+ * this lock, we can finally dispose it. */
+ destroy = list_empty(&lock->waiting) && (lock->refs_pending == 0);
+ if (destroy) {
+ ec_trace("LOCK_DESTROY", link->fop, "lock=%p", lock);
+
+ lock->ctx->inode_lock = NULL;
+ } else {
+ ec_trace("LOCK_UNFREEZE", link->fop, "lock=%p", lock);
+
+ ec_lock_wake_shared(lock, &list);
}
- ec_trace("LOCK_DESTROY", fop, "lock=%p", lock);
+ UNLOCK(&lock->loc.inode->lock);
+
+ ec_lock_resume_shared(&list);
- ec_lock_destroy(lock);
+ if (destroy) {
+ ec_lock_destroy(lock);
+ }
}
-int32_t ec_update_size_version_done(call_frame_t * frame, void * cookie,
- xlator_t * this, int32_t op_ret,
- int32_t op_errno, dict_t * xattr,
- dict_t * xdata)
+int32_t
+ec_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- ec_fop_data_t * fop = cookie;
+ ec_fop_data_t *fop = cookie;
+ ec_lock_link_t *link = fop->data;
- if (op_ret < 0)
- {
- gf_log(fop->xl->name, GF_LOG_ERROR, "Failed to update version and "
- "size (error %d)", op_errno);
- }
- else
- {
- fop->parent->mask &= fop->good;
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_UNLOCK_FAILED,
+ "entry/inode unlocking failed :(%s)", ec_msg_str(link->fop));
+ } else {
+ ec_trace("UNLOCKED", link->fop, "lock=%p", link->lock);
}
- if (fop->data != NULL) {
- ec_unlock_lock(fop->parent, fop->data);
- }
+ ec_lock_unfreeze(link);
return 0;
}
-void ec_update_size_version(ec_fop_data_t *fop, loc_t *loc, uint64_t version,
- uint64_t size, gf_boolean_t dirty, ec_lock_t *lock)
+void
+ec_unlock_lock(ec_lock_link_t *link)
+{
+ ec_lock_t *lock;
+ ec_fop_data_t *fop;
+ gf_lkowner_t lk_owner;
+
+ lock = link->lock;
+ fop = link->fop;
+
+ lock->unlock_now = _gf_false;
+ ec_clear_inode_info(fop, lock->loc.inode);
+
+ if ((lock->mask != 0) && lock->acquired) {
+ set_lk_owner_from_ptr(&lk_owner, lock);
+ lock->flock.l_type = F_UNLCK;
+ ec_trace("UNLOCK_INODELK", fop, "lock=%p, inode=%p", lock,
+ lock->loc.inode);
+
+ ec_inodelk(fop->frame, fop->xl, &lk_owner, lock->mask, EC_MINIMUM_ONE,
+ ec_unlocked, link, fop->xl->name, &lock->loc, F_SETLK,
+ &lock->flock, NULL);
+ } else {
+ ec_lock_unfreeze(link);
+ }
+}
+
+void
+ec_inode_bad_inc(inode_t *inode, xlator_t *xl)
{
- dict_t * dict;
- uid_t uid;
- gid_t gid;
+ ec_inode_t *ctx = NULL;
- if (fop->parent != NULL)
+ LOCK(&inode->lock);
{
- fop->parent->post_size = fop->post_size;
+ ctx = __ec_inode_get(inode, xl);
+ if (ctx == NULL) {
+ goto unlock;
+ }
+ ctx->bad_version++;
+ }
+unlock:
+ UNLOCK(&inode->lock);
+}
- return;
+int32_t
+ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_lock_link_t *link;
+ ec_lock_t *lock;
+ ec_inode_t *ctx;
+
+ link = fop->data;
+ lock = link->lock;
+ ctx = lock->ctx;
+
+ if (op_ret < 0) {
+ if (link->lock->fd == NULL) {
+ ec_inode_bad_inc(link->lock->loc.inode, this);
+ } else {
+ ec_inode_bad_inc(link->lock->fd->inode, this);
+ }
+
+ gf_msg(fop->xl->name, fop_log_level(fop->id, op_errno), op_errno,
+ EC_MSG_SIZE_VERS_UPDATE_FAIL,
+ "Failed to update version and size. %s", ec_msg_str(fop));
+ } else {
+ fop->parent->good &= fop->good;
+
+ ec_lock_update_good(lock, fop);
+
+ if (ec_dict_del_array(xattr, EC_XATTR_VERSION, ctx->post_version,
+ EC_VERSION_SIZE) == 0) {
+ ctx->pre_version[0] = ctx->post_version[0];
+ ctx->pre_version[1] = ctx->post_version[1];
+
+ ctx->have_version = _gf_true;
+ }
+ if (ec_dict_del_number(xattr, EC_XATTR_SIZE, &ctx->post_size) == 0) {
+ ctx->pre_size = ctx->post_size;
+
+ ctx->have_size = _gf_true;
+ }
+ if ((ec_dict_del_config(xdata, EC_XATTR_CONFIG, &ctx->config) == 0) &&
+ ec_config_check(fop->xl, &ctx->config)) {
+ ctx->have_config = _gf_true;
+ }
+
+ ctx->have_info = _gf_true;
}
+ /* If we are here because of fop's and other than unlock request,
+ * that means we are still holding a lock. That make sure
+ * lock->unlock_now can not be modified.
+ */
+ if (lock->unlock_now) {
+ ec_unlock_lock(fop->data);
+ }
+
+ return 0;
+}
+
+void
+ec_update_size_version(ec_lock_link_t *link, uint64_t *version, uint64_t size,
+ uint64_t *dirty)
+{
+ ec_fop_data_t *fop;
+ ec_lock_t *lock;
+ ec_inode_t *ctx;
+ dict_t *dict = NULL;
+ uintptr_t update_on = 0;
+ int32_t err = -ENOMEM;
+
+ fop = link->fop;
+ lock = link->lock;
+ ctx = lock->ctx;
- ec_trace("UPDATE", fop, "version=%ld, size=%ld, dirty=%u", version, size,
- dirty);
+ ec_trace("UPDATE", fop, "version=%ld/%ld, size=%ld, dirty=%ld/%ld",
+ version[0], version[1], size, dirty[0], dirty[1]);
dict = dict_new();
- if (dict == NULL)
- {
+ if (dict == NULL) {
goto out;
}
- if (version != 0) {
- if (ec_dict_set_number(dict, EC_XATTR_VERSION, version) != 0) {
+ /* If we don't have version information or it has been modified, we
+ * update it. */
+ if (!ctx->have_version || (version[0] != 0) || (version[1] != 0)) {
+ err = ec_dict_set_array(dict, EC_XATTR_VERSION, version,
+ EC_VERSION_SIZE);
+ if (err != 0) {
goto out;
}
}
+
if (size != 0) {
- if (ec_dict_set_number(dict, EC_XATTR_SIZE, size) != 0) {
+ /* If size has been changed, we should already
+ * know the previous size of the file. */
+ GF_ASSERT(ctx->have_size);
+
+ err = ec_dict_set_number(dict, EC_XATTR_SIZE, size);
+ if (err != 0) {
goto out;
}
}
- if (dirty) {
- if (ec_dict_set_number(dict, EC_XATTR_DIRTY, -1) != 0) {
+
+ if (dirty[0] || dirty[1]) {
+ err = ec_dict_set_array(dict, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE);
+ if (err != 0) {
goto out;
}
}
- uid = fop->frame->root->uid;
- gid = fop->frame->root->gid;
+ /* If config information is not known, we request it now. */
+ if ((lock->loc.inode->ia_type == IA_IFREG) && !ctx->have_config) {
+ /* A failure requesting this xattr is ignored because it's not
+ * absolutely required right now. */
+ (void)ec_dict_set_number(dict, EC_XATTR_CONFIG, 0);
+ }
fop->frame->root->uid = 0;
fop->frame->root->gid = 0;
- if (fop->use_fd) {
- ec_fxattrop(fop->frame, fop->xl, fop->mask, EC_MINIMUM_MIN,
- ec_update_size_version_done, lock, fop->fd,
- GF_XATTROP_ADD_ARRAY64, dict, NULL);
+ update_on = lock->good_mask | lock->healing;
+
+ if (link->lock->fd == NULL) {
+ ec_xattrop(fop->frame, fop->xl, update_on, EC_MINIMUM_MIN,
+ ec_update_size_version_done, link, &link->lock->loc,
+ GF_XATTROP_ADD_ARRAY64, dict, NULL);
} else {
- ec_xattrop(fop->frame, fop->xl, fop->mask, EC_MINIMUM_MIN,
- ec_update_size_version_done, lock, loc,
- GF_XATTROP_ADD_ARRAY64, dict, NULL);
+ ec_fxattrop(fop->frame, fop->xl, update_on, EC_MINIMUM_MIN,
+ ec_update_size_version_done, link, link->lock->fd,
+ GF_XATTROP_ADD_ARRAY64, dict, NULL);
}
- fop->frame->root->uid = uid;
- fop->frame->root->gid = gid;
+ fop->frame->root->uid = fop->uid;
+ fop->frame->root->gid = fop->gid;
dict_unref(dict);
return;
out:
- if (dict != NULL)
- {
+ if (dict != NULL) {
dict_unref(dict);
}
- ec_fop_set_error(fop, EIO);
+ ec_fop_set_error(fop, -err);
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unable to update version and size");
+ gf_msg(fop->xl->name, GF_LOG_ERROR, -err, EC_MSG_SIZE_VERS_UPDATE_FAIL,
+ "Unable to update version and size. %s", ec_msg_str(fop));
+
+ if (lock->unlock_now) {
+ ec_unlock_lock(fop->data);
+ }
}
-void ec_unlock_now(ec_fop_data_t *fop, ec_lock_t *lock)
+gf_boolean_t
+ec_update_info(ec_lock_link_t *link)
{
- ec_trace("UNLOCK_NOW", fop, "lock=%p", lock);
+ ec_lock_t *lock;
+ ec_inode_t *ctx;
+ uint64_t version[2] = {0, 0};
+ uint64_t dirty[2] = {0, 0};
+ uint64_t size;
+ ec_t *ec = NULL;
+ uintptr_t mask;
- if ((lock->version_delta != 0) || lock->is_dirty) {
- ec_update_size_version(fop, &lock->loc, lock->version_delta,
- lock->size_delta, lock->is_dirty, lock);
- } else {
- ec_unlock_lock(fop, lock);
+ lock = link->lock;
+ ctx = lock->ctx;
+ ec = link->fop->xl->private;
+
+ /* pre_version[*] will be 0 if have_version is false */
+ version[EC_DATA_TXN] = ctx->post_version[EC_DATA_TXN] -
+ ctx->pre_version[EC_DATA_TXN];
+ version[EC_METADATA_TXN] = ctx->post_version[EC_METADATA_TXN] -
+ ctx->pre_version[EC_METADATA_TXN];
+
+ size = ctx->post_size - ctx->pre_size;
+ /* If we set the dirty flag for update fop, we have to unset it.
+ * If fop has failed on some bricks, leave the dirty as marked. */
+
+ if (lock->unlock_now) {
+ if (version[EC_DATA_TXN]) {
+ /*A data fop will have difference in post and pre version
+ *and for data fop we send writes on healing bricks also */
+ mask = lock->good_mask | lock->healing;
+ } else {
+ mask = lock->good_mask;
+ }
+ /* Ensure that nodes are up while doing final
+ * metadata update.*/
+ if (!(ec->node_mask & ~(mask)) && !(ec->node_mask & ~ec->xl_up)) {
+ if (ctx->dirty[EC_DATA_TXN] != 0) {
+ dirty[EC_DATA_TXN] = -1;
+ }
+ if (ctx->dirty[EC_METADATA_TXN] != 0) {
+ dirty[EC_METADATA_TXN] = -1;
+ }
+ /*If everything is fine and we already
+ *have version xattr set on entry, there
+ *is no need to update version again*/
+ if (ctx->pre_version[EC_DATA_TXN]) {
+ version[EC_DATA_TXN] = 0;
+ }
+ if (ctx->pre_version[EC_METADATA_TXN]) {
+ version[EC_METADATA_TXN] = 0;
+ }
+ } else {
+ link->optimistic_changelog = _gf_false;
+ ec_set_dirty_flag(link, ctx, dirty);
+ }
+ memset(ctx->dirty, 0, sizeof(ctx->dirty));
}
- ec_resume(fop, 0);
+ if ((version[EC_DATA_TXN] != 0) || (version[EC_METADATA_TXN] != 0) ||
+ (dirty[EC_DATA_TXN] != 0) || (dirty[EC_METADATA_TXN] != 0)) {
+ ec_update_size_version(link, version, size, dirty);
+ return _gf_true;
+ }
+
+ return _gf_false;
}
-void ec_unlock_timer_cbk(void *data)
+void
+ec_unlock_now(ec_lock_link_t *link)
{
- ec_lock_link_t *link = data;
- ec_lock_t *lock = link->lock;
- ec_fop_data_t *fop = NULL;
+ ec_lock_t *lock;
+ lock = link->lock;
- LOCK(&lock->loc.inode->lock);
+ ec_trace("UNLOCK_NOW", link->fop, "lock=%p", link->lock);
+ /*At this point, lock is not being used by any fop and
+ *can not be reused by any fop as it is going to be released.
+ *lock->unlock_now can not be modified at any other place.
+ */
+ lock->unlock_now = _gf_true;
- if (lock->timer != NULL) {
- fop = link->fop;
+ if (!ec_update_info(link)) {
+ ec_unlock_lock(link);
+ }
- ec_trace("UNLOCK_DELAYED", fop, "lock=%p", lock);
+ ec_resume(link->fop, 0);
+}
- GF_ASSERT(lock->refs == 1);
+void
+ec_lock_release(ec_t *ec, inode_t *inode)
+{
+ ec_lock_t *lock;
+ ec_inode_t *ctx;
+ ec_lock_link_t *timer_link = NULL;
+
+ LOCK(&inode->lock);
+
+ ctx = __ec_inode_get(inode, ec->xl);
+ if (ctx == NULL) {
+ goto done;
+ }
+ lock = ctx->inode_lock;
+ if ((lock == NULL) || lock->release) {
+ goto done;
+ }
+
+ gf_msg_debug(ec->xl->name, 0, "Releasing inode %p due to lock contention",
+ inode);
+
+ if (!lock->acquired) {
+ /* This happens if some bricks already got the lock while inodelk is in
+ * progress. Set release to true after lock is acquired*/
+ lock->contention = _gf_true;
+ goto done;
+ }
+
+ /* The lock is not marked to be released, so the frozen list should be
+ * empty. */
+ GF_ASSERT(list_empty(&lock->frozen));
+
+ timer_link = ec_lock_timer_cancel(ec->xl, lock);
+
+ /* We mark the lock to be released as soon as possible. */
+ lock->release = _gf_true;
+
+done:
+ UNLOCK(&inode->lock);
+
+ /* If we have cancelled the timer, we need to start the unlock of the
+ * inode. If there was a timer but we have been unable to cancel it
+ * because it was just triggered, the timer callback will take care
+ * of releasing the inode. */
+ if (timer_link != NULL) {
+ ec_unlock_now(timer_link);
+ }
+}
- gf_timer_call_cancel(fop->xl->ctx, lock->timer);
+void
+ec_unlock_timer_add(ec_lock_link_t *link);
+
+void
+ec_unlock_timer_del(ec_lock_link_t *link)
+{
+ ec_lock_t *lock;
+ inode_t *inode;
+ gf_boolean_t now = _gf_false;
+
+ /* If we are here, it means that the timer has expired before having
+ * been cancelled. This guarantees that 'link' is still valid because
+ * the fop that contains it must be pending (if timer cancellation in
+ * ec_lock_assign_owner() fails, the fop is left sleeping).
+ *
+ * At the same time, the fop still has a reference to the lock, so
+ * it must also be valid.
+ */
+ lock = link->lock;
+
+ /* 'lock' must have a valid inode since it can only be destroyed
+ * when the lock itself is destroyed, but we have a reference to the
+ * lock to avoid this.
+ */
+ inode = lock->loc.inode;
+
+ LOCK(&inode->lock);
+
+ if (lock->timer != NULL) {
+ ec_trace("UNLOCK_DELAYED", link->fop, "lock=%p", lock);
+
+ /* The unlock timer has expired without anyone cancelling it.
+ * This means that it shouldn't have any owner, and the waiting
+ * and frozen lists should be empty. It must have only one
+ * owner reference, but there can be fops being prepared
+ * though.
+ * */
+ GF_ASSERT(!lock->release && (lock->refs_owners == 1) &&
+ list_empty(&lock->owners) && list_empty(&lock->waiting) &&
+ list_empty(&lock->frozen));
+
+ gf_timer_call_cancel(link->fop->xl->ctx, lock->timer);
lock->timer = NULL;
- *lock->plock = NULL;
+
+ /* Any fop being processed from now on, will need to wait
+ * until the next unlock/lock cycle. */
+ lock->release = now = _gf_true;
}
- UNLOCK(&lock->loc.inode->lock);
+ UNLOCK(&inode->lock);
- if (fop != NULL) {
- ec_unlock_now(fop, lock);
+ if (now) {
+ ec_unlock_now(link);
+ } else {
+ /* The timer has been cancelled just after firing it but before
+ * getting here. This means that another fop has used the lock
+ * and everything should be handled as if this callback were
+ * have not been executed. However we still have an owner
+ * reference.
+ *
+ * We need to release our reference. If this is not the last
+ * reference (the most common case because another fop has
+ * taken another ref) we only need to decrement the counter.
+ * Otherwise we have been delayed enough so that the other fop
+ * has had time to acquire the reference, do its operation and
+ * release it. At the time of releasing it, the fop did found
+ * that the ref counter was > 1 (our reference), so the delayed
+ * unlock timer wasn't started. We need to start it again if we
+ * are the last reference.
+ *
+ * ec_unlock_timer_add() handles both cases.
+ */
+ ec_unlock_timer_add(link);
+
+ /* We need to resume the fop that was waiting for the delayed
+ * unlock.
+ */
+ ec_resume(link->fop, 0);
+ }
+}
+
+void
+ec_unlock_timer_cbk(void *data)
+{
+ ec_unlock_timer_del(data);
+}
+
+static gf_boolean_t
+ec_eager_lock_used(ec_t *ec, ec_fop_data_t *fop)
+{
+ /* Fops with no locks at this point mean that they are sent as sub-fops
+ * of other higher level fops. In this case we simply assume that the
+ * parent fop will take correct care of the eager lock. */
+ if (fop->lock_count == 0) {
+ return _gf_true;
+ }
+
+ /* We may have more than one lock, but this only happens in the rename
+ * fop, and both locks will reference an inode of the same type (a
+ * directory in this case), so we only need to check the first lock. */
+ if (fop->locks[0].lock->loc.inode->ia_type == IA_IFREG) {
+ return ec->eager_lock;
+ }
+
+ return ec->other_eager_lock;
+}
+
+static uint32_t
+ec_eager_lock_timeout(ec_t *ec, ec_lock_t *lock)
+{
+ if (lock->loc.inode->ia_type == IA_IFREG) {
+ return ec->eager_lock_timeout;
}
+
+ return ec->other_eager_lock_timeout;
}
-void ec_unlock_timer_add(ec_lock_link_t *link)
+static gf_boolean_t
+ec_lock_delay_create(ec_lock_link_t *link)
{
struct timespec delay;
ec_fop_data_t *fop = link->fop;
ec_lock_t *lock = link->lock;
- int32_t refs = 1;
+
+ delay.tv_sec = ec_eager_lock_timeout(fop->xl->private, lock);
+ delay.tv_nsec = 0;
+ lock->timer = gf_timer_call_after(fop->xl->ctx, delay, ec_unlock_timer_cbk,
+ link);
+ if (lock->timer == NULL) {
+ gf_msg(fop->xl->name, GF_LOG_WARNING, ENOMEM,
+ EC_MSG_UNLOCK_DELAY_FAILED, "Unable to delay an unlock");
+
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+void
+ec_unlock_timer_add(ec_lock_link_t *link)
+{
+ ec_fop_data_t *fop = link->fop;
+ ec_lock_t *lock = link->lock;
+ gf_boolean_t now = _gf_false;
LOCK(&lock->loc.inode->lock);
- GF_ASSERT(lock->timer == NULL);
+ /* We are trying to unlock the lock. We can have multiple scenarios here,
+ * but all of them need to have lock->timer == NULL:
+ *
+ * 1. There are other owners currently running that can call ec_unlock().
+ *
+ * None of them can have started the timer until the last one. But this
+ * call should be the consequence of this lastest one.
+ *
+ * 2. There are fops in the waiting or frozen lists.
+ *
+ * These fops cannot call ec_unlock(). So we should be here.
+ *
+ * We must reach here with at least one owner reference.
+ */
+ GF_ASSERT((lock->timer == NULL) && (lock->refs_owners > 0));
+
+ /* If the fop detects that a heal is needed, we mark the lock to be
+ * released as soon as possible. */
+ lock->release |= ec_fop_needs_heal(fop);
- if (lock->refs != 1) {
+ if (lock->refs_owners > 1) {
ec_trace("UNLOCK_SKIP", fop, "lock=%p", lock);
- lock->refs--;
+ /* If there are other owners we cannot do anything else with the lock.
+ * Note that the current fop has already been removed from the owners
+ * list in ec_lock_reuse(). */
+ lock->refs_owners--;
UNLOCK(&lock->loc.inode->lock);
} else if (lock->acquired) {
- delay.tv_sec = 1;
- delay.tv_nsec = 0;
+ /* There are no other owners and the lock is acquired. If there were
+ * fops waiting, at least one of them should have been promoted to an
+ * owner, so the waiting list should be empty. */
+ GF_ASSERT(list_empty(&lock->owners) && list_empty(&lock->waiting));
+ ec_t *ec = fop->xl->private;
+
+ /* If everything goes as expected this fop will be put to sleep until
+ * the timer callback is executed. */
ec_sleep(fop);
- /* If healing is needed, do not delay lock release to let self-heal
- * start working as soon as possible. */
- if (!ec_fop_needs_heal(fop)) {
- ec_trace("UNLOCK_DELAY", fop, "lock=%p", lock);
-
- delay.tv_sec = 1;
- delay.tv_nsec = 0;
- lock->timer = gf_timer_call_after(fop->xl->ctx, delay,
- ec_unlock_timer_cbk, link);
- if (lock->timer == NULL) {
- gf_log(fop->xl->name, GF_LOG_WARNING, "Unable to delay an "
- "unlock");
-
- *lock->plock = NULL;
- refs = 0;
+ /* If the lock needs to be released, or ec is shutting down, do not
+ * delay lock release. */
+ if (!lock->release && !ec->shutdown) {
+ ec_trace("UNLOCK_DELAY", fop, "lock=%p, release=%d", lock,
+ lock->release);
+
+ if (!ec_lock_delay_create(link)) {
+ /* We are unable to create a new timer. We immediately release
+ * the lock. */
+ lock->release = now = _gf_true;
}
+
} else {
- ec_trace("UNLOCK_FORCE", fop, "lock=%p", lock);
- *lock->plock = NULL;
- refs = 0;
+ ec_trace("UNLOCK_FORCE", fop, "lock=%p, release=%d", lock,
+ lock->release);
+ lock->release = now = _gf_true;
}
UNLOCK(&lock->loc.inode->lock);
- if (refs == 0) {
- ec_unlock_now(fop, lock);
+ if (now) {
+ ec_unlock_now(link);
}
} else {
- *lock->plock = NULL;
+ /* There are no owners and the lock is not acquired. This can only
+ * happen if a lock attempt has failed and we get to the unlock step
+ * of the fop. As in the previous case, the waiting list must be
+ * empty. */
+ GF_ASSERT(list_empty(&lock->owners) && list_empty(&lock->waiting));
+
+ /* We need to mark the lock to be released to correctly handle fops
+ * that may get in after we release the inode mutex but before
+ * ec_lock_unfreeze() is processed. */
+ lock->release = _gf_true;
UNLOCK(&lock->loc.inode->lock);
- ec_lock_destroy(lock);
+ ec_lock_unfreeze(link);
}
}
-void ec_unlock(ec_fop_data_t *fop)
+void
+ec_unlock(ec_fop_data_t *fop)
{
int32_t i;
@@ -1559,103 +2839,155 @@ void ec_unlock(ec_fop_data_t *fop)
}
}
-void ec_flush_size_version(ec_fop_data_t * fop)
+void
+ec_flush_size_version(ec_fop_data_t *fop)
{
- ec_lock_t * lock;
- uint64_t version, delta;
-
GF_ASSERT(fop->lock_count == 1);
+ ec_update_info(&fop->locks[0]);
+}
- lock = fop->locks[0].lock;
-
- LOCK(&lock->loc.inode->lock);
-
- GF_ASSERT(lock->owner == fop);
-
- version = lock->version_delta;
- delta = lock->size_delta;
- lock->version_delta = 0;
- lock->size_delta = 0;
-
- UNLOCK(&lock->loc.inode->lock);
+static void
+ec_update_stripe(ec_t *ec, ec_stripe_list_t *stripe_cache, ec_stripe_t *stripe,
+ ec_fop_data_t *fop)
+{
+ off_t base;
+
+ /* On write fops, we only update existing fragments if the write has
+ * succeeded. Otherwise, we remove them from the cache. */
+ if ((fop->id == GF_FOP_WRITE) && (fop->answer != NULL) &&
+ (fop->answer->op_ret >= 0)) {
+ base = stripe->frag_offset - fop->frag_range.first;
+ base *= ec->fragments;
+
+ /* We check if the stripe offset falls inside the real region
+ * modified by the write fop (a write request is allowed,
+ * though uncommon, to write less bytes than requested). The
+ * current write fop implementation doesn't allow partial
+ * writes of fragments, so if there's no error, we are sure
+ * that a full stripe has been completely modified or not
+ * touched at all. The value of op_ret may not be a multiple
+ * of the stripe size because it depends on the requested
+ * size by the user, so we update the stripe if the write has
+ * modified at least one byte (meaning ec has written the full
+ * stripe). */
+ if (base < fop->answer->op_ret + fop->head) {
+ memcpy(stripe->data, fop->vector[0].iov_base + base,
+ ec->stripe_size);
+ list_move_tail(&stripe->lru, &stripe_cache->lru);
+
+ GF_ATOMIC_INC(ec->stats.stripe_cache.updates);
+ }
+ } else {
+ stripe->frag_offset = -1;
+ list_move(&stripe->lru, &stripe_cache->lru);
- if (version > 0)
- {
- ec_update_size_version(fop, &lock->loc, version, delta, _gf_false,
- NULL);
+ GF_ATOMIC_INC(ec->stats.stripe_cache.invals);
}
}
-void ec_lock_reuse(ec_fop_data_t *fop)
+static void
+ec_update_cached_stripes(ec_fop_data_t *fop)
{
- ec_fop_data_t * wait_fop;
- ec_lock_t * lock;
- ec_lock_link_t * link;
- int32_t i;
-
- for (i = 0; i < fop->lock_count; i++)
- {
- wait_fop = NULL;
-
- lock = fop->locks[i].lock;
-
- LOCK(&lock->loc.inode->lock);
+ uint64_t first;
+ uint64_t last;
+ ec_stripe_t *stripe = NULL;
+ ec_inode_t *ctx = NULL;
+ ec_stripe_list_t *stripe_cache = NULL;
+ inode_t *inode = NULL;
+ struct list_head *temp;
+ struct list_head sentinel;
+
+ first = fop->frag_range.first;
+ /* 'last' represents the first stripe not touched by the operation */
+ last = fop->frag_range.last;
+
+ /* If there are no modified stripes, we don't need to do anything
+ * else. */
+ if (last <= first) {
+ return;
+ }
- ec_trace("LOCK_DONE", fop, "lock=%p", lock);
+ if (!fop->use_fd) {
+ inode = fop->loc[0].inode;
+ } else {
+ inode = fop->fd->inode;
+ }
- GF_ASSERT(lock->owner == fop);
- lock->owner = NULL;
+ LOCK(&inode->lock);
- if (((fop->locks_update >> i) & 1) != 0) {
- if (fop->error == 0)
- {
- lock->version_delta++;
- lock->size_delta += fop->post_size - fop->pre_size;
- if (fop->have_size) {
- lock->size = fop->post_size;
- lock->have_size = 1;
- }
- }
+ ctx = __ec_inode_get(inode, fop->xl);
+ if (ctx == NULL) {
+ goto out;
+ }
+ stripe_cache = &ctx->stripe_cache;
+
+ /* Since we'll be moving elements of the list to the tail, we might
+ * end in an infinite loop. To avoid it, we insert a sentinel element
+ * into the list, so that it will be used to detect when we have
+ * traversed all existing elements once. */
+ list_add_tail(&sentinel, &stripe_cache->lru);
+ temp = stripe_cache->lru.next;
+ while (temp != &sentinel) {
+ stripe = list_entry(temp, ec_stripe_t, lru);
+ temp = temp->next;
+ if ((first <= stripe->frag_offset) && (stripe->frag_offset < last)) {
+ ec_update_stripe(fop->xl->private, stripe_cache, stripe, fop);
}
+ }
+ list_del(&sentinel);
- lock->good_mask &= fop->mask;
-
- if (!list_empty(&lock->waiting))
- {
- link = list_entry(lock->waiting.next, ec_lock_link_t, wait_list);
- list_del_init(&link->wait_list);
-
- wait_fop = link->fop;
+out:
+ UNLOCK(&inode->lock);
+}
- if (lock->kind == EC_LOCK_INODE)
- {
- wait_fop->pre_size = wait_fop->post_size = fop->post_size;
- wait_fop->have_size = fop->have_size;
+void
+ec_lock_reuse(ec_fop_data_t *fop)
+{
+ ec_cbk_data_t *cbk;
+ ec_t *ec = NULL;
+ int32_t i, count;
+ gf_boolean_t release = _gf_false;
+ ec = fop->xl->private;
+ cbk = fop->answer;
+
+ if (ec_eager_lock_used(ec, fop) && cbk != NULL) {
+ if (cbk->xdata != NULL) {
+ if ((dict_get_int32(cbk->xdata, GLUSTERFS_INODELK_COUNT, &count) ==
+ 0) &&
+ (count > 1)) {
+ release = _gf_true;
+ }
+ if (release) {
+ gf_msg_debug(fop->xl->name, 0, "Lock contention detected");
}
- wait_fop->mask &= fop->mask;
}
+ } else {
+ /* If eager lock is disabled or if we haven't get
+ * an answer with enough quorum, we always release
+ * the lock. */
+ release = _gf_true;
+ }
+ ec_update_cached_stripes(fop);
- UNLOCK(&lock->loc.inode->lock);
-
- if (wait_fop != NULL)
- {
- ec_lock(wait_fop);
-
- ec_resume(wait_fop, 0);
- }
+ for (i = 0; i < fop->lock_count; i++) {
+ ec_lock_next_owner(&fop->locks[i], cbk, release);
}
}
-void __ec_manager(ec_fop_data_t * fop, int32_t error)
+void
+__ec_manager(ec_fop_data_t *fop, int32_t error)
{
ec_t *ec = fop->xl->private;
do {
ec_trace("MANAGER", fop, "error=%d", error);
- if (ec->xl_up_count < ec->fragments) {
- error = ENOTCONN;
+ if (!ec_must_wind(fop)) {
+ if (ec->xl_up_count < ec->fragments) {
+ error = ENOTCONN;
+ }
}
+
if (error != 0) {
fop->error = error;
fop->state = -fop->state;
@@ -1667,22 +2999,44 @@ void __ec_manager(ec_fop_data_t * fop, int32_t error)
break;
}
+ /* At each state, fop must not be used anywhere else and there
+ * shouldn't be any pending subfop going on. */
+ GF_ASSERT(fop->jobs == 0);
+
+ /* While the manager is running we need to avoid that subfops launched
+ * from it could finish and call ec_resume() before the fop->handler
+ * has completed. This could lead to the same manager being executed
+ * by two threads concurrently. ec_check_complete() will take care of
+ * this reference. */
+ fop->jobs = 1;
+
fop->state = fop->handler(fop, fop->state);
+ GF_ASSERT(fop->state >= 0);
error = ec_check_complete(fop, __ec_manager);
} while (error >= 0);
}
-void ec_manager(ec_fop_data_t * fop, int32_t error)
+void
+ec_manager(ec_fop_data_t *fop, int32_t error)
{
GF_ASSERT(fop->jobs == 0);
GF_ASSERT(fop->winds == 0);
GF_ASSERT(fop->error == 0);
- if (fop->state == EC_STATE_START)
- {
+ if (fop->state == EC_STATE_START) {
fop->state = EC_STATE_INIT;
}
__ec_manager(fop, error);
}
+
+gf_boolean_t
+__ec_is_last_fop(ec_t *ec)
+{
+ if ((list_empty(&ec->pending_fops)) &&
+ (GF_ATOMIC_GET(ec->async_fop_count) == 0)) {
+ return _gf_true;
+ }
+ return _gf_false;
+}
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
index aaae16e71c3..51493612ac6 100644
--- a/xlators/cluster/ec/src/ec-common.h
+++ b/xlators/cluster/ec/src/ec-common.h
@@ -11,93 +11,224 @@
#ifndef __EC_COMMON_H__
#define __EC_COMMON_H__
-#include "xlator.h"
-
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
#include "ec-data.h"
+typedef enum { EC_DATA_TXN, EC_METADATA_TXN } ec_txn_t;
+
+#define EC_FOP_HEAL -1
+#define EC_FOP_FHEAL -2
+
#define EC_CONFIG_VERSION 0
#define EC_CONFIG_ALGORITHM 0
-#define EC_FLAG_UPDATE_LOC_PARENT 0x0001
-#define EC_FLAG_UPDATE_LOC_INODE 0x0002
-#define EC_FLAG_UPDATE_FD 0x0004
-#define EC_FLAG_UPDATE_FD_INODE 0x0008
-
-#define EC_FLAG_WAITING_WINDS 0x0010
-
-#define EC_MINIMUM_ONE -1
-#define EC_MINIMUM_MIN -2
-#define EC_MINIMUM_ALL -3
-
-#define EC_LOCK_ENTRY 0
-#define EC_LOCK_INODE 1
-
-#define EC_STATE_START 0
-#define EC_STATE_END 0
-#define EC_STATE_INIT 1
-#define EC_STATE_LOCK 2
-#define EC_STATE_GET_SIZE_AND_VERSION 3
-#define EC_STATE_DISPATCH 4
-#define EC_STATE_PREPARE_ANSWER 5
-#define EC_STATE_REPORT 6
-#define EC_STATE_LOCK_REUSE 7
-#define EC_STATE_UNLOCK 8
-
-#define EC_STATE_DELAYED_START 100
-
-#define EC_STATE_HEAL_ENTRY_LOOKUP 200
-#define EC_STATE_HEAL_ENTRY_PREPARE 201
-#define EC_STATE_HEAL_PRE_INODELK_LOCK 202
-#define EC_STATE_HEAL_PRE_INODE_LOOKUP 203
-#define EC_STATE_HEAL_XATTRIBUTES_REMOVE 204
-#define EC_STATE_HEAL_XATTRIBUTES_SET 205
-#define EC_STATE_HEAL_ATTRIBUTES 206
-#define EC_STATE_HEAL_OPEN 207
-#define EC_STATE_HEAL_REOPEN_FD 208
-#define EC_STATE_HEAL_UNLOCK 209
-#define EC_STATE_HEAL_UNLOCK_ENTRY 210
-#define EC_STATE_HEAL_DATA_LOCK 211
-#define EC_STATE_HEAL_DATA_COPY 212
-#define EC_STATE_HEAL_DATA_UNLOCK 213
-#define EC_STATE_HEAL_POST_INODELK_LOCK 214
-#define EC_STATE_HEAL_POST_INODE_LOOKUP 215
-#define EC_STATE_HEAL_SETATTR 216
-#define EC_STATE_HEAL_POST_INODELK_UNLOCK 217
-#define EC_STATE_HEAL_DISPATCH 218
-
-int32_t ec_dispatch_one_retry(ec_fop_data_t * fop, int32_t idx, int32_t op_ret,
- int32_t op_errno);
-int32_t ec_dispatch_next(ec_fop_data_t * fop, int32_t idx);
-
-void ec_complete(ec_fop_data_t * fop);
-
-void ec_update_bad(ec_fop_data_t * fop, uintptr_t good);
-
-void ec_fop_set_error(ec_fop_data_t * fop, int32_t error);
-
-void ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, int32_t update);
-void ec_lock_prepare_entry(ec_fop_data_t *fop, loc_t *loc, int32_t update);
-void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, int32_t update);
-void ec_lock(ec_fop_data_t * fop);
-void ec_lock_reuse(ec_fop_data_t *fop);
-void ec_unlock(ec_fop_data_t * fop);
-
-void ec_get_size_version(ec_fop_data_t * fop);
-void ec_prepare_update(ec_fop_data_t *fop);
-void ec_flush_size_version(ec_fop_data_t * fop);
-
-void ec_dispatch_all(ec_fop_data_t * fop);
-void ec_dispatch_inc(ec_fop_data_t * fop);
-void ec_dispatch_min(ec_fop_data_t * fop);
-void ec_dispatch_one(ec_fop_data_t * fop);
-
-void ec_wait_winds(ec_fop_data_t * fop);
-
-void ec_sleep(ec_fop_data_t *fop);
-void ec_resume(ec_fop_data_t * fop, int32_t error);
-void ec_resume_parent(ec_fop_data_t * fop, int32_t error);
-
-void ec_manager(ec_fop_data_t * fop, int32_t error);
-
+#define EC_FLAG_LOCK_SHARED 0x0001
+
+#define QUORUM_CBK(fn, fop, frame, cookie, this, op_ret, op_errno, params...) \
+ do { \
+ ec_t *__ec = fop->xl->private; \
+ int32_t __op_ret = 0; \
+ int32_t __op_errno = 0; \
+ int32_t __success_count = gf_bits_count(fop->good); \
+ \
+ __op_ret = op_ret; \
+ __op_errno = op_errno; \
+ if (!fop->parent && frame && \
+ (GF_CLIENT_PID_SELF_HEALD != frame->root->pid) && \
+ __ec->quorum_count && (__success_count < __ec->quorum_count) && \
+ op_ret >= 0) { \
+ __op_ret = -1; \
+ __op_errno = EIO; \
+ gf_msg(__ec->xl->name, GF_LOG_ERROR, 0, \
+ EC_MSG_CHILDS_INSUFFICIENT, \
+ "Insufficient available children for this request " \
+ "(have %d, need %d). %s", \
+ __success_count, __ec->quorum_count, ec_msg_str(fop)); \
+ } \
+ fn(frame, cookie, this, __op_ret, __op_errno, params); \
+ } while (0)
+
+enum _ec_xattrop_flags {
+ EC_FLAG_XATTROP,
+ EC_FLAG_DATA_DIRTY,
+ EC_FLAG_METADATA_DIRTY,
+
+ /* Add any new flag here, before EC_FLAG_MAX. The maximum number of
+ * flags that can be defined is 16. */
+
+ EC_FLAG_MAX
+};
+
+/* We keep two sets of flags. One to determine what's really providing the
+ * current xattrop and the other to know what the parent fop of the xattrop
+ * needs to proceed. It might happen that a fop needs some information that
+ * is being already requested by a previous fop. The two sets are stored
+ * contiguously. */
+
+#define EC_FLAG_NEEDS(_flag) (1 << (_flag))
+#define EC_FLAG_PROVIDES(_flag) (1 << ((_flag) + EC_FLAG_MAX))
+
+#define EC_NEEDED_FLAGS(_flags) ((_flags) & ((1 << EC_FLAG_MAX) - 1))
+
+#define EC_PROVIDED_FLAGS(_flags) EC_NEEDED_FLAGS((_flags) >> EC_FLAG_MAX)
+
+#define EC_FLAGS_HAVE(_flags, _flag) (((_flags) & (1 << (_flag))) != 0)
+
+#define EC_SELFHEAL_BIT 62
+
+#define EC_MINIMUM_ONE (1 << 6)
+#define EC_MINIMUM_MIN (2 << 6)
+#define EC_MINIMUM_ALL (3 << 6)
+#define EC_FOP_NO_PROPAGATE_ERROR (1 << 8)
+#define EC_FOP_MINIMUM(_flags) ((_flags)&255)
+#define EC_FOP_FLAGS(_flags) ((_flags) & ~255)
+
+#define EC_UPDATE_DATA 1
+#define EC_UPDATE_META 2
+#define EC_QUERY_INFO 4
+#define EC_INODE_SIZE 8
+
+#define EC_STATE_START 0
+#define EC_STATE_END 0
+#define EC_STATE_INIT 1
+#define EC_STATE_LOCK 2
+#define EC_STATE_DISPATCH 3
+#define EC_STATE_PREPARE_ANSWER 4
+#define EC_STATE_REPORT 5
+#define EC_STATE_LOCK_REUSE 6
+#define EC_STATE_UNLOCK 7
+
+#define EC_STATE_DELAYED_START 100
+
+#define EC_STATE_HEAL_ENTRY_LOOKUP 200
+#define EC_STATE_HEAL_ENTRY_PREPARE 201
+#define EC_STATE_HEAL_PRE_INODELK_LOCK 202
+#define EC_STATE_HEAL_PRE_INODE_LOOKUP 203
+#define EC_STATE_HEAL_XATTRIBUTES_REMOVE 204
+#define EC_STATE_HEAL_XATTRIBUTES_SET 205
+#define EC_STATE_HEAL_ATTRIBUTES 206
+#define EC_STATE_HEAL_OPEN 207
+#define EC_STATE_HEAL_REOPEN_FD 208
+#define EC_STATE_HEAL_UNLOCK 209
+#define EC_STATE_HEAL_UNLOCK_ENTRY 210
+#define EC_STATE_HEAL_DATA_LOCK 211
+#define EC_STATE_HEAL_DATA_COPY 212
+#define EC_STATE_HEAL_DATA_UNLOCK 213
+#define EC_STATE_HEAL_POST_INODELK_LOCK 214
+#define EC_STATE_HEAL_POST_INODE_LOOKUP 215
+#define EC_STATE_HEAL_SETATTR 216
+#define EC_STATE_HEAL_POST_INODELK_UNLOCK 217
+#define EC_STATE_HEAL_DISPATCH 218
+
+/* Value to cover the full range of a file */
+#define EC_RANGE_FULL ((uint64_t)LLONG_MAX + 1)
+
+gf_boolean_t
+ec_dispatch_one_retry(ec_fop_data_t *fop, ec_cbk_data_t **cbk);
+void
+ec_dispatch_next(ec_fop_data_t *fop, uint32_t idx);
+
+void
+ec_complete(ec_fop_data_t *fop);
+
+void
+ec_update_good(ec_fop_data_t *fop, uintptr_t good);
+
+void
+ec_fop_set_error(ec_fop_data_t *fop, int32_t error);
+
+void
+__ec_fop_set_error(ec_fop_data_t *fop, int32_t error);
+
+ec_cbk_data_t *
+ec_fop_prepare_answer(ec_fop_data_t *fop, gf_boolean_t ro);
+
+gf_boolean_t
+ec_cbk_set_error(ec_cbk_data_t *cbk, int32_t error, gf_boolean_t ro);
+
+void
+ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, uint32_t flags,
+ off_t fl_start, uint64_t fl_size);
+void
+ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, loc_t *base,
+ uint32_t flags);
+void
+ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags, off_t fl_start,
+ uint64_t fl_size);
+void
+ec_lock(ec_fop_data_t *fop);
+void
+ec_lock_reuse(ec_fop_data_t *fop);
+void
+ec_unlock(ec_fop_data_t *fop);
+void
+ec_lock_release(ec_t *ec, inode_t *inode);
+
+gf_boolean_t
+ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t *size);
+gf_boolean_t
+__ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t *size);
+gf_boolean_t
+ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t size);
+gf_boolean_t
+__ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t size);
+void
+ec_clear_inode_info(ec_fop_data_t *fop, inode_t *inode);
+
+void
+ec_flush_size_version(ec_fop_data_t *fop);
+
+void
+ec_dispatch_all(ec_fop_data_t *fop);
+void
+ec_dispatch_inc(ec_fop_data_t *fop);
+void
+ec_dispatch_min(ec_fop_data_t *fop);
+void
+ec_dispatch_one(ec_fop_data_t *fop);
+
+void
+ec_succeed_all(ec_fop_data_t *fop);
+
+void
+ec_sleep(ec_fop_data_t *fop);
+void
+ec_resume(ec_fop_data_t *fop, int32_t error);
+void
+ec_resume_parent(ec_fop_data_t *fop);
+
+void
+ec_manager(ec_fop_data_t *fop, int32_t error);
+gf_boolean_t
+ec_is_recoverable_error(int32_t op_errno);
+void
+ec_handle_healers_done(ec_fop_data_t *fop);
+
+int32_t
+ec_heal_inspect(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *locked_on, gf_boolean_t self_locked,
+ gf_boolean_t thorough, ec_heal_need_t *need_heal);
+int32_t
+ec_get_heal_info(xlator_t *this, loc_t *loc, dict_t **dict);
+
+int32_t
+ec_lock_unlocked(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+void
+ec_update_fd_status(fd_t *fd, xlator_t *xl, int child_index,
+ int32_t ret_status);
+gf_boolean_t
+ec_is_entry_healing(ec_fop_data_t *fop);
+void
+ec_set_entry_healing(ec_fop_data_t *fop);
+void
+ec_reset_entry_healing(ec_fop_data_t *fop);
+char *
+ec_msg_str(ec_fop_data_t *fop);
+gf_boolean_t
+__ec_is_last_fop(ec_t *ec);
+void
+ec_lock_update_good(ec_lock_t *lock, ec_fop_data_t *fop);
#endif /* __EC_COMMON_H__ */
diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c
index fb47aea90a8..06388833546 100644
--- a/xlators/cluster/ec/src/ec-data.c
+++ b/xlators/cluster/ec/src/ec-data.c
@@ -8,49 +8,49 @@
cases as published by the Free Software Foundation.
*/
-#include "ec-mem-types.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-data.h"
+#include "ec-messages.h"
-ec_cbk_data_t * ec_cbk_data_allocate(call_frame_t * frame, xlator_t * this,
- ec_fop_data_t * fop, int32_t id,
- int32_t idx, int32_t op_ret,
- int32_t op_errno)
+ec_cbk_data_t *
+ec_cbk_data_allocate(call_frame_t *frame, xlator_t *this, ec_fop_data_t *fop,
+ int32_t id, int32_t idx, int32_t op_ret, int32_t op_errno)
{
- ec_cbk_data_t * cbk;
- ec_t * ec = this->private;
+ ec_cbk_data_t *cbk;
+ ec_t *ec = this->private;
- if (fop->xl != this)
- {
- gf_log(this->name, GF_LOG_ERROR, "Mismatching xlators between request "
- "and answer (req=%s, ans=%s).",
- fop->xl->name, this->name);
+ if (fop->xl != this) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, EC_MSG_XLATOR_MISMATCH,
+ "Mismatching xlators between request "
+ "and answer (req=%s, ans=%s).",
+ fop->xl->name, this->name);
return NULL;
}
- if (fop->frame != frame)
- {
- gf_log(this->name, GF_LOG_ERROR, "Mismatching frames between request "
- "and answer (req=%p, ans=%p).",
- fop->frame, frame);
+ if (fop->frame != frame) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, EC_MSG_FRAME_MISMATCH,
+ "Mismatching frames between request "
+ "and answer (req=%p, ans=%p).",
+ fop->frame, frame);
return NULL;
}
- if (fop->id != id)
- {
- gf_log(this->name, GF_LOG_ERROR, "Mismatching fops between request "
- "and answer (req=%d, ans=%d).",
- fop->id, id);
+ if (fop->id != id) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, EC_MSG_FOP_MISMATCH,
+ "Mismatching fops between request "
+ "and answer (req=%d, ans=%d).",
+ fop->id, id);
return NULL;
}
cbk = mem_get0(ec->cbk_pool);
- if (cbk == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to allocate memory for an "
- "answer.");
+ if (cbk == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to allocate memory for an "
+ "answer.");
+ return NULL;
}
cbk->fop = fop;
@@ -59,6 +59,7 @@ ec_cbk_data_t * ec_cbk_data_allocate(call_frame_t * frame, xlator_t * this,
cbk->count = 1;
cbk->op_ret = op_ret;
cbk->op_errno = op_errno;
+ INIT_LIST_HEAD(&cbk->entries.list);
LOCK(&fop->lock);
@@ -69,51 +70,58 @@ ec_cbk_data_t * ec_cbk_data_allocate(call_frame_t * frame, xlator_t * this,
return cbk;
}
-void ec_cbk_data_destroy(ec_cbk_data_t * cbk)
+void
+ec_cbk_data_destroy(ec_cbk_data_t *cbk)
{
- if (cbk->xdata != NULL)
- {
+ if (cbk->xdata != NULL) {
dict_unref(cbk->xdata);
}
- if (cbk->dict != NULL)
- {
+ if (cbk->dict != NULL) {
dict_unref(cbk->dict);
}
- if (cbk->inode != NULL)
- {
+ if (cbk->inode != NULL) {
inode_unref(cbk->inode);
}
- if (cbk->fd != NULL)
- {
+ if (cbk->fd != NULL) {
fd_unref(cbk->fd);
}
- if (cbk->buffers != NULL)
- {
+ if (cbk->buffers != NULL) {
iobref_unref(cbk->buffers);
}
GF_FREE(cbk->vector);
+ gf_dirent_free(&cbk->entries);
+ GF_FREE(cbk->str);
mem_put(cbk);
}
-ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this,
- int32_t id, uint32_t flags,
- uintptr_t target, int32_t minimum,
- ec_wind_f wind, ec_handler_f handler,
- ec_cbk_t cbks, void * data)
+ec_fop_data_t *
+ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id,
+ uint32_t flags, uintptr_t target, uint32_t fop_flags,
+ ec_wind_f wind, ec_handler_f handler, ec_cbk_t cbks,
+ void *data)
{
- ec_fop_data_t * fop, * parent;
- ec_t * ec = this->private;
+ ec_fop_data_t *fop, *parent;
+ ec_t *ec = this->private;
fop = mem_get0(ec->fop_pool);
- if (fop == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to allocate memory for a "
- "request.");
+ if (fop == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to allocate memory for a "
+ "request.");
return NULL;
}
+ INIT_LIST_HEAD(&fop->cbk_list);
+ INIT_LIST_HEAD(&fop->healer);
+ INIT_LIST_HEAD(&fop->answer_list);
+ INIT_LIST_HEAD(&fop->pending_list);
+ INIT_LIST_HEAD(&fop->locks[0].owner_list);
+ INIT_LIST_HEAD(&fop->locks[0].wait_list);
+ INIT_LIST_HEAD(&fop->locks[1].owner_list);
+ INIT_LIST_HEAD(&fop->locks[1].wait_list);
+
fop->xl = this;
fop->req_frame = frame;
@@ -124,18 +132,15 @@ ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this,
* TODO: minimize usage of private frames. Reuse req_frame as much as
* possible.
*/
- if (frame != NULL)
- {
+ if (frame != NULL) {
fop->frame = copy_frame(frame);
- }
- else
- {
+ } else {
fop->frame = create_frame(this, this->ctx->pool);
}
- if (fop->frame == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to create a private frame "
- "for a request");
+ if (fop->frame == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to create a private frame "
+ "for a request");
mem_put(fop);
@@ -145,41 +150,42 @@ ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this,
fop->refs = 1;
fop->flags = flags;
- fop->minimum = minimum;
+ fop->minimum = EC_FOP_MINIMUM(fop_flags);
+ fop->fop_flags = EC_FOP_FLAGS(fop_flags);
fop->mask = target;
- INIT_LIST_HEAD(&fop->cbk_list);
- INIT_LIST_HEAD(&fop->answer_list);
-
fop->wind = wind;
fop->handler = handler;
fop->cbks = cbks;
fop->data = data;
+ fop->uid = fop->frame->root->uid;
+ fop->gid = fop->frame->root->gid;
+
LOCK_INIT(&fop->lock);
fop->frame->local = fop;
- if (frame != NULL)
- {
+ if (frame != NULL) {
parent = frame->local;
- if (parent != NULL)
- {
- LOCK(&parent->lock);
-
- parent->jobs++;
- parent->refs++;
-
- UNLOCK(&parent->lock);
+ if (parent != NULL) {
+ ec_sleep(parent);
}
fop->parent = parent;
}
+ LOCK(&ec->lock);
+
+ list_add_tail(&fop->pending_list, &ec->pending_fops);
+
+ UNLOCK(&ec->lock);
+
return fop;
}
-void ec_fop_data_acquire(ec_fop_data_t * fop)
+void
+ec_fop_data_acquire(ec_fop_data_t *fop)
{
LOCK(&fop->lock);
@@ -190,44 +196,74 @@ void ec_fop_data_acquire(ec_fop_data_t * fop)
UNLOCK(&fop->lock);
}
-void ec_fop_data_release(ec_fop_data_t * fop)
+static void
+ec_handle_last_pending_fop_completion(ec_fop_data_t *fop, gf_boolean_t *notify)
+{
+ ec_t *ec = fop->xl->private;
+
+ *notify = _gf_false;
+
+ if (!list_empty(&fop->pending_list)) {
+ LOCK(&ec->lock);
+ {
+ list_del_init(&fop->pending_list);
+ *notify = __ec_is_last_fop(ec);
+ }
+ UNLOCK(&ec->lock);
+ }
+}
+
+void
+ec_fop_cleanup(ec_fop_data_t *fop)
+{
+ ec_cbk_data_t *cbk, *tmp;
+
+ list_for_each_entry_safe(cbk, tmp, &fop->answer_list, answer_list)
+ {
+ list_del_init(&cbk->answer_list);
+
+ ec_cbk_data_destroy(cbk);
+ }
+ INIT_LIST_HEAD(&fop->cbk_list);
+
+ fop->answer = NULL;
+}
+
+void
+ec_fop_data_release(ec_fop_data_t *fop)
{
- ec_cbk_data_t * cbk, * tmp;
+ ec_t *ec = NULL;
int32_t refs;
+ gf_boolean_t notify = _gf_false;
LOCK(&fop->lock);
ec_trace("RELEASE", fop, "");
+ GF_ASSERT(fop->refs > 0);
refs = --fop->refs;
UNLOCK(&fop->lock);
- if (refs == 0)
- {
+ if (refs == 0) {
fop->frame->local = NULL;
STACK_DESTROY(fop->frame->root);
LOCK_DESTROY(&fop->lock);
- if (fop->xdata != NULL)
- {
+ if (fop->xdata != NULL) {
dict_unref(fop->xdata);
}
- if (fop->dict != NULL)
- {
+ if (fop->dict != NULL) {
dict_unref(fop->dict);
}
- if (fop->inode != NULL)
- {
+ if (fop->inode != NULL) {
inode_unref(fop->inode);
}
- if (fop->fd != NULL)
- {
+ if (fop->fd != NULL) {
fd_unref(fop->fd);
}
- if (fop->buffers != NULL)
- {
+ if (fop->buffers != NULL) {
iobref_unref(fop->buffers);
}
GF_FREE(fop->vector);
@@ -235,16 +271,18 @@ void ec_fop_data_release(ec_fop_data_t * fop)
GF_FREE(fop->str[1]);
loc_wipe(&fop->loc[0]);
loc_wipe(&fop->loc[1]);
+ GF_FREE(fop->errstr);
- ec_resume_parent(fop, fop->error);
+ ec_resume_parent(fop);
- list_for_each_entry_safe(cbk, tmp, &fop->answer_list, answer_list)
- {
- list_del_init(&cbk->answer_list);
-
- ec_cbk_data_destroy(cbk);
- }
+ ec_fop_cleanup(fop);
+ ec = fop->xl->private;
+ ec_handle_last_pending_fop_completion(fop, &notify);
+ ec_handle_healers_done(fop);
mem_put(fop);
+ if (notify) {
+ ec_pending_fops_completed(ec);
+ }
}
}
diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h
index 80936aeaada..c8a74ffe1ed 100644
--- a/xlators/cluster/ec/src/ec-data.h
+++ b/xlators/cluster/ec/src/ec-data.h
@@ -11,291 +11,25 @@
#ifndef __EC_DATA_H__
#define __EC_DATA_H__
-#include "xlator.h"
-
-#include "ec.h"
-
-struct _ec_config;
-typedef struct _ec_config ec_config_t;
-
-struct _ec_fd;
-typedef struct _ec_fd ec_fd_t;
-
-struct _ec_inode;
-typedef struct _ec_inode ec_inode_t;
-
-union _ec_cbk;
-typedef union _ec_cbk ec_cbk_t;
-
-struct _ec_lock;
-typedef struct _ec_lock ec_lock_t;
-
-struct _ec_lock_link;
-typedef struct _ec_lock_link ec_lock_link_t;
-
-struct _ec_fop_data;
-typedef struct _ec_fop_data ec_fop_data_t;
-
-struct _ec_cbk_data;
-typedef struct _ec_cbk_data ec_cbk_data_t;
-
-struct _ec_heal;
-typedef struct _ec_heal ec_heal_t;
-
-typedef void (* ec_wind_f)(ec_t *, ec_fop_data_t *, int32_t);
-typedef int32_t (* ec_handler_f)(ec_fop_data_t *, int32_t);
-typedef void (* ec_resume_f)(ec_fop_data_t *, int32_t);
-
-struct _ec_config
-{
- uint32_t version;
- uint8_t algorithm;
- uint8_t gf_word_size;
- uint8_t bricks;
- uint8_t redundancy;
- uint32_t chunk_size;
-};
-
-struct _ec_fd
-{
- uintptr_t bad;
- loc_t loc;
- uintptr_t open;
- int32_t flags;
-};
-
-struct _ec_inode
-{
- uintptr_t bad;
- ec_lock_t *entry_lock;
- ec_lock_t *inode_lock;
- struct list_head heal;
-
-};
-
-typedef int32_t (* fop_heal_cbk_t)(call_frame_t *, void * cookie, xlator_t *,
- int32_t, int32_t, uintptr_t, uintptr_t,
- uintptr_t, dict_t *);
-typedef int32_t (* fop_fheal_cbk_t)(call_frame_t *, void * cookie, xlator_t *,
- int32_t, int32_t, uintptr_t, uintptr_t,
- uintptr_t, dict_t *);
-
-
-union _ec_cbk
-{
- fop_access_cbk_t access;
- fop_create_cbk_t create;
- fop_discard_cbk_t discard;
- fop_entrylk_cbk_t entrylk;
- fop_fentrylk_cbk_t fentrylk;
- fop_fallocate_cbk_t fallocate;
- fop_flush_cbk_t flush;
- fop_fsync_cbk_t fsync;
- fop_fsyncdir_cbk_t fsyncdir;
- fop_getxattr_cbk_t getxattr;
- fop_fgetxattr_cbk_t fgetxattr;
- fop_heal_cbk_t heal;
- fop_fheal_cbk_t fheal;
- fop_inodelk_cbk_t inodelk;
- fop_finodelk_cbk_t finodelk;
- fop_link_cbk_t link;
- fop_lk_cbk_t lk;
- fop_lookup_cbk_t lookup;
- fop_mkdir_cbk_t mkdir;
- fop_mknod_cbk_t mknod;
- fop_open_cbk_t open;
- fop_opendir_cbk_t opendir;
- fop_readdir_cbk_t readdir;
- fop_readdirp_cbk_t readdirp;
- fop_readlink_cbk_t readlink;
- fop_readv_cbk_t readv;
- fop_removexattr_cbk_t removexattr;
- fop_fremovexattr_cbk_t fremovexattr;
- fop_rename_cbk_t rename;
- fop_rmdir_cbk_t rmdir;
- fop_setattr_cbk_t setattr;
- fop_fsetattr_cbk_t fsetattr;
- fop_setxattr_cbk_t setxattr;
- fop_fsetxattr_cbk_t fsetxattr;
- fop_stat_cbk_t stat;
- fop_fstat_cbk_t fstat;
- fop_statfs_cbk_t statfs;
- fop_symlink_cbk_t symlink;
- fop_truncate_cbk_t truncate;
- fop_ftruncate_cbk_t ftruncate;
- fop_unlink_cbk_t unlink;
- fop_writev_cbk_t writev;
- fop_xattrop_cbk_t xattrop;
- fop_fxattrop_cbk_t fxattrop;
- fop_zerofill_cbk_t zerofill;
-};
-
-struct _ec_lock
-{
- ec_lock_t **plock;
- gf_timer_t *timer;
- struct list_head waiting;
- uintptr_t mask;
- uintptr_t good_mask;
- int32_t kind;
- int32_t refs;
- int32_t acquired;
- int32_t have_size;
- uint64_t size;
- uint64_t size_delta;
- uint64_t version;
- uint64_t version_delta;
- ec_fop_data_t *owner;
- loc_t loc;
- union
- {
- entrylk_type type;
- struct gf_flock flock;
- };
- gf_boolean_t is_dirty;
-};
-
-struct _ec_lock_link
-{
- ec_lock_t * lock;
- ec_fop_data_t * fop;
- struct list_head wait_list;
-};
-
-struct _ec_fop_data
-{
- int32_t id;
- int32_t refs;
- int32_t state;
- int32_t minimum;
- int32_t expected;
- int32_t winds;
- int32_t jobs;
- int32_t error;
- ec_fop_data_t * parent;
- xlator_t * xl;
- call_frame_t * req_frame; // frame of the calling xlator
- call_frame_t * frame; // frame used by this fop
- struct list_head cbk_list; // sorted list of groups of answers
- struct list_head answer_list; // list of answers
- ec_cbk_data_t * answer; // accepted answer
- int32_t lock_count;
- int32_t locked;
- ec_lock_link_t locks[2];
- int32_t locks_update;
- int32_t have_size;
- uint64_t pre_size;
- uint64_t post_size;
- gf_lock_t lock;
- ec_config_t config;
-
- uint32_t flags;
- uint32_t first;
- uintptr_t mask;
- uintptr_t healing; /*Dispatch is done but call is successful only
- if fop->minimum number of subvolumes succeed
- which are not healing*/
- uintptr_t remaining;
- uintptr_t good;
- uintptr_t bad;
-
- ec_wind_f wind;
- ec_handler_f handler;
- ec_resume_f resume;
- ec_cbk_t cbks;
- void * data;
- ec_heal_t *heal;
-
- uint64_t user_size;
- uint32_t head;
-
- int32_t use_fd;
-
- dict_t * xdata;
- dict_t * dict;
- int32_t int32;
- uint32_t uint32;
- uint64_t size;
- off_t offset;
- mode_t mode[2];
- entrylk_cmd entrylk_cmd;
- entrylk_type entrylk_type;
- gf_xattrop_flags_t xattrop_flags;
- dev_t dev;
- inode_t * inode;
- fd_t * fd;
- struct iatt iatt;
- char * str[2];
- loc_t loc[2];
- struct gf_flock flock;
- struct iovec * vector;
- struct iobref * buffers;
-};
-
-struct _ec_cbk_data
-{
- struct list_head list; // item in the sorted list of groups
- struct list_head answer_list; // item in the list of answers
- ec_fop_data_t * fop;
- ec_cbk_data_t * next; // next answer in the same group
- int32_t idx;
- int32_t op_ret;
- int32_t op_errno;
- int32_t count;
- uintptr_t mask;
-
- dict_t * xdata;
- dict_t * dict;
- int32_t int32;
- uintptr_t uintptr[3];
- uint64_t size;
- uint64_t version;
- inode_t * inode;
- fd_t * fd;
- struct statvfs statvfs;
- struct iatt iatt[5];
- struct gf_flock flock;
- struct iovec * vector;
- struct iobref * buffers;
- gf_boolean_t dirty;
-};
-
-struct _ec_heal
-{
- struct list_head list;
- gf_lock_t lock;
- xlator_t *xl;
- ec_fop_data_t *fop;
- void *data;
- ec_fop_data_t *lookup;
- loc_t loc;
- struct iatt iatt;
- char *symlink;
- fd_t *fd;
- int32_t partial;
- int32_t done;
- gf_boolean_t nameheal;
- uintptr_t available;
- uintptr_t good;
- uintptr_t bad;
- uintptr_t open;
- uintptr_t fixed;
- uint64_t offset;
- uint64_t size;
- uint64_t version;
- uint64_t raw_size;
-};
-
-ec_cbk_data_t * ec_cbk_data_allocate(call_frame_t * frame, xlator_t * this,
- ec_fop_data_t * fop, int32_t id,
- int32_t idx, int32_t op_ret,
- int32_t op_errno);
-ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this,
- int32_t id, uint32_t flags,
- uintptr_t target, int32_t minimum,
- ec_wind_f wind, ec_handler_f handler,
- ec_cbk_t cbks, void * data);
-void ec_fop_data_acquire(ec_fop_data_t * fop);
-void ec_fop_data_release(ec_fop_data_t * fop);
+#include "ec-types.h"
+
+ec_cbk_data_t *
+ec_cbk_data_allocate(call_frame_t *frame, xlator_t *this, ec_fop_data_t *fop,
+ int32_t id, int32_t idx, int32_t op_ret, int32_t op_errno);
+ec_fop_data_t *
+ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id,
+ uint32_t flags, uintptr_t target, uint32_t fop_flags,
+ ec_wind_f wind, ec_handler_f handler, ec_cbk_t cbks,
+ void *data);
+void
+ec_fop_data_acquire(ec_fop_data_t *fop);
+void
+ec_fop_data_release(ec_fop_data_t *fop);
+
+void
+ec_fop_cleanup(ec_fop_data_t *fop);
+
+void
+ec_pending_fops_completed(ec_t *ec);
#endif /* __EC_DATA_H__ */
diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c
index ffc3ed5a7cd..f71dcfac293 100644
--- a/xlators/cluster/ec/src/ec-dir-read.c
+++ b/xlators/cluster/ec/src/ec-dir-read.c
@@ -8,24 +8,26 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-
+#include "ec.h"
+#include "ec-messages.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
-#include "ec-method.h"
#include "ec-fops.h"
-/* FOP: opendir */
+/****************************************************************
+ *
+ * File Operation: opendir
+ *
+ ***************************************************************/
-int32_t ec_combine_opendir(ec_fop_data_t * fop, ec_cbk_data_t * dst,
- ec_cbk_data_t * src)
+int32_t
+ec_combine_opendir(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
{
- if (dst->fd != src->fd)
- {
- gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching fd in answers "
- "of 'GF_FOP_OPENDIR': %p <-> %p",
+ if (dst->fd != src->fd) {
+ gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_FD_MISMATCH,
+ "Mismatching fd in answers "
+ "of 'GF_FOP_OPENDIR': %p <-> %p",
dst->fd, src->fd);
return 0;
@@ -34,12 +36,12 @@ int32_t ec_combine_opendir(ec_fop_data_t * fop, ec_cbk_data_t * dst,
return 1;
}
-int32_t ec_opendir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, fd_t * fd,
- dict_t * xdata)
+int32_t
+ec_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- ec_fop_data_t * fop = NULL;
- ec_cbk_data_t * cbk = NULL;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
@@ -49,52 +51,51 @@ int32_t ec_opendir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
fop = frame->local;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_OPENDIR, idx, op_ret,
op_errno);
- if (cbk != NULL)
- {
- if (op_ret >= 0)
- {
- if (fd != NULL)
- {
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ if (fd != NULL) {
cbk->fd = fd_ref(fd);
- if (cbk->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (cbk->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
cbk->xdata = dict_ref(xdata);
- if (cbk->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
}
ec_combine(cbk, ec_combine_opendir);
+
+ ec_update_fd_status(fd, this, idx, op_ret);
}
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_complete(fop);
}
return 0;
}
-void ec_wind_opendir(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_opendir(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -103,39 +104,45 @@ void ec_wind_opendir(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
&fop->loc[0], fop->fd, fop->xdata);
}
-int32_t ec_manager_opendir(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_opendir(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk;
ec_fd_t *ctx;
+ int32_t err;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
LOCK(&fop->fd->lock);
ctx = __ec_fd_get(fop->fd, fop->xl);
- if ((ctx == NULL) ||
- (ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0])) != 0) {
+ if (ctx == NULL) {
UNLOCK(&fop->fd->lock);
- fop->error = EIO;
+ fop->error = ENOMEM;
return EC_STATE_REPORT;
}
+ if (!ctx->loc.inode) {
+ err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
+ if (err != 0) {
+ UNLOCK(&fop->fd->lock);
+
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+ }
UNLOCK(&fop->fd->lock);
/* Fall through */
case EC_STATE_LOCK:
- ec_lock_prepare_entry(fop, &fop->loc[0], 0);
+ ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO, 0,
+ EC_RANGE_FULL);
ec_lock(fop);
- return EC_STATE_GET_SIZE_AND_VERSION;
-
- case EC_STATE_GET_SIZE_AND_VERSION:
- ec_get_size_version(fop);
-
return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH:
@@ -144,38 +151,20 @@ int32_t ec_manager_opendir(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
+ if (cbk != NULL) {
+ /* Save which subvolumes successfully opened the directory.
+ * If ctx->open is 0, it means that readdir cannot be
+ * processed in this directory.
+ */
+ LOCK(&fop->fd->lock);
+
+ ctx = __ec_fd_get(fop->fd, fop->xl);
+ if (ctx != NULL) {
+ ctx->open |= cbk->mask;
}
- if (cbk->op_ret >= 0) {
- /* Save which subvolumes successfully opened the directory.
- * If ctx->open is 0, it means that readdir cannot be
- * processed in this directory.
- */
- LOCK(&fop->fd->lock);
-
- ctx = __ec_fd_get(fop->fd, fop->xl);
- if (ctx != NULL) {
- ctx->open |= cbk->mask;
- }
- UNLOCK(&fop->fd->lock);
- }
- if (cbk->op_ret < 0) {
- ec_fop_set_error(fop, cbk->op_errno);
- }
- }
- else
- {
- ec_fop_set_error(fop, EIO);
+ UNLOCK(&fop->fd->lock);
}
return EC_STATE_REPORT;
@@ -185,8 +174,7 @@ int32_t ec_manager_opendir(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->cbks.opendir != NULL)
- {
+ if (fop->cbks.opendir != NULL) {
fop->cbks.opendir(fop->req_frame, fop, fop->xl, cbk->op_ret,
cbk->op_errno, cbk->fd, cbk->xdata);
}
@@ -195,14 +183,12 @@ int32_t ec_manager_opendir(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_INIT:
case -EC_STATE_LOCK:
- case -EC_STATE_GET_SIZE_AND_VERSION:
case -EC_STATE_DISPATCH:
case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->cbks.opendir != NULL)
- {
+ if (fop->cbks.opendir != NULL) {
fop->cbks.opendir(fop->req_frame, fop, fop->xl, -1, fop->error,
NULL, NULL);
}
@@ -222,62 +208,59 @@ int32_t ec_manager_opendir(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_opendir(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_opendir_cbk_t func, void * data,
- loc_t * loc, fd_t * fd, dict_t * xdata)
+void
+ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_opendir_cbk_t func, void *data, loc_t *loc,
+ fd_t *fd, dict_t *xdata)
{
- ec_cbk_t callback = { .opendir = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.opendir = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(OPENDIR) %p", frame);
+ gf_msg_trace("ec", 0, "EC(OPENDIR) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_OPENDIR, EC_FLAG_UPDATE_FD,
- target, minimum, ec_wind_opendir,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_OPENDIR, EC_FLAG_LOCK_SHARED,
+ target, fop_flags, ec_wind_opendir,
ec_manager_opendir, callback, data);
- if (fop == NULL)
- {
+ if (fop == NULL) {
goto out;
}
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (fd != NULL)
- {
+ if (fd != NULL) {
fop->fd = fd_ref(fd);
- if (fop->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -286,70 +269,72 @@ void ec_opendir(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
}
}
/* Returns -1 if client_id is invalid else index of child subvol in xl_list */
int
-ec_deitransform (xlator_t *this, off_t offset)
+ec_deitransform(xlator_t *this, off_t offset)
{
- int idx = -1;
- int client_id = -1;
- ec_t *ec = this->private;
- char id[32] = {0};
-
- client_id = gf_deitransform (this, offset);
- sprintf (id, "%d", client_id);
- if (dict_get_int32 (ec->leaf_to_subvolid, id, &idx)) {
- idx = -1;
- goto out;
- }
+ int idx = -1;
+ int client_id = -1;
+ ec_t *ec = this->private;
+ char id[32] = {0};
+ int err;
+
+ client_id = gf_deitransform(this, offset);
+ sprintf(id, "%d", client_id);
+ err = dict_get_int32(ec->leaf_to_subvolid, id, &idx);
+ if (err < 0) {
+ idx = err;
+ goto out;
+ }
out:
- if (idx < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Invalid index %d in readdirp request", client_id);
- }
- return idx;
+ if (idx < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_REQUEST,
+ "Invalid index %d in readdirp request", client_id);
+ idx = -EINVAL;
+ }
+ return idx;
}
/* FOP: readdir */
-void ec_adjust_readdir(ec_t * ec, int32_t idx, gf_dirent_t * entries)
+void
+ec_adjust_readdirp(ec_t *ec, int32_t idx, gf_dirent_t *entries)
{
- gf_dirent_t * entry;
+ gf_dirent_t *entry;
list_for_each_entry(entry, &entries->list, list)
{
- if (entry->d_stat.ia_type == IA_IFREG)
- {
+ if (!entry->inode)
+ continue;
+
+ if (entry->d_stat.ia_type == IA_IFREG) {
if ((entry->dict == NULL) ||
(ec_dict_del_number(entry->dict, EC_XATTR_SIZE,
- &entry->d_stat.ia_size) != 0))
- {
- gf_log(ec->xl->name, GF_LOG_WARNING, "Unable to get exact "
- "file size.");
-
- entry->d_stat.ia_size *= ec->fragments;
+ &entry->d_stat.ia_size) != 0)) {
+ inode_unref(entry->inode);
+ entry->inode = NULL;
+ } else {
+ ec_iatt_rebuild(ec, &entry->d_stat, 1, 1);
}
-
- ec_iatt_rebuild(ec, &entry->d_stat, 1, 1);
}
}
}
-int32_t ec_readdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t * entries,
- dict_t * xdata)
+int32_t
+ec_common_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- ec_fop_data_t * fop = NULL;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
@@ -359,149 +344,199 @@ int32_t ec_readdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
fop = frame->local;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
-
- if (op_ret > 0)
- {
- ec_adjust_readdir(fop->xl->private, idx, entries);
- }
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
- if (!ec_dispatch_one_retry(fop, idx, op_ret, op_errno))
- {
- if (fop->cbks.readdir != NULL)
- {
- fop->cbks.readdir(fop->req_frame, fop, this, op_ret, op_errno,
- entries, xdata);
- }
+ cbk = ec_cbk_data_allocate(frame, this, fop, fop->id, idx, op_ret,
+ op_errno);
+ if (cbk) {
+ if (xdata)
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->op_ret >= 0)
+ list_splice_init(&entries->list, &cbk->entries.list);
+ ec_combine(cbk, NULL);
}
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_complete(fop);
}
return 0;
}
-void ec_wind_readdir(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_readdir(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
- STACK_WIND_COOKIE(fop->frame, ec_readdir_cbk, (void *)(uintptr_t)idx,
+ STACK_WIND_COOKIE(fop->frame, ec_common_readdir_cbk, (void *)(uintptr_t)idx,
ec->xl_list[idx], ec->xl_list[idx]->fops->readdir,
fop->fd, fop->size, fop->offset, fop->xdata);
}
-int32_t ec_manager_readdir(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_readdir(ec_fop_data_t *fop, int32_t state)
{
- ec_fd_t *ctx;
+ ec_fd_t *ctx = NULL;
+ ec_cbk_data_t *cbk = NULL;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
/* Return error if opendir has not been successfully called on
* any subvolume. */
ctx = ec_fd_get(fop->fd, fop->xl);
- if ((ctx == NULL) || (ctx->open == 0)) {
- fop->error = EINVAL;
+ if (ctx == NULL) {
+ fop->error = ENOMEM;
+ } else if (ctx->open == 0) {
+ fop->error = EBADFD;
+ }
+ if (fop->error) {
+ gf_msg(fop->xl->name, GF_LOG_ERROR, fop->error,
+ EC_MSG_INVALID_REQUEST, "EC is not winding readdir: %s",
+ ec_msg_str(fop));
return EC_STATE_REPORT;
}
- if (fop->xdata == NULL)
- {
- fop->xdata = dict_new();
- if (fop->xdata == NULL)
- {
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unable to prepare "
- "readdirp request");
+ if (fop->id == GF_FOP_READDIRP) {
+ int32_t err;
- fop->error = EIO;
+ if (fop->xdata == NULL) {
+ fop->xdata = dict_new();
+ if (fop->xdata == NULL) {
+ fop->error = ENOMEM;
- return EC_STATE_REPORT;
+ return EC_STATE_REPORT;
+ }
}
- }
- if (dict_set_uint64(fop->xdata, EC_XATTR_SIZE, 0) != 0)
- {
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unable to prepare "
- "readdirp request");
- fop->error = EIO;
+ err = dict_set_uint64(fop->xdata, EC_XATTR_SIZE, 0);
+ if (err != 0) {
+ fop->error = -err;
- return EC_STATE_REPORT;
+ return EC_STATE_REPORT;
+ }
}
- if (fop->offset != 0)
- {
+ if (fop->offset != 0) {
+ /* Non-zero offset is irrecoverable error as the offset may not
+ * be valid on other bricks*/
int32_t idx = -1;
- idx = ec_deitransform (fop->xl, fop->offset);
+ idx = ec_deitransform(fop->xl, fop->offset);
if (idx < 0) {
- fop->error = EIO;
- return EC_STATE_REPORT;
+ fop->error = -idx;
+ return EC_STATE_REPORT;
}
fop->mask &= 1ULL << idx;
+ } else {
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, 0,
+ EC_RANGE_FULL);
+ ec_lock(fop);
}
- /* Fall through */
+ return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH:
ec_dispatch_one(fop);
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ if (ec_dispatch_one_retry(fop, &cbk)) {
+ return EC_STATE_DISPATCH;
+ }
+
+ if ((cbk != NULL) && (cbk->op_ret > 0) &&
+ (fop->id == GF_FOP_READDIRP)) {
+ ec_adjust_readdirp(fop->xl->private, cbk->idx, &cbk->entries);
+ }
+
return EC_STATE_REPORT;
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+ GF_ASSERT(cbk);
+ if (fop->id == GF_FOP_READDIR) {
+ if (fop->cbks.readdir != NULL) {
+ fop->cbks.readdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, &cbk->entries, cbk->xdata);
+ }
+ } else {
+ if (fop->cbks.readdirp != NULL) {
+ fop->cbks.readdirp(fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno,
+ &cbk->entries, cbk->xdata);
+ }
+ }
+ if (fop->offset == 0)
+ return EC_STATE_LOCK_REUSE;
+ else
+ return EC_STATE_END;
+
case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
- if (fop->id == GF_FOP_READDIR)
- {
- if (fop->cbks.readdir != NULL)
- {
+ if (fop->id == GF_FOP_READDIR) {
+ if (fop->cbks.readdir != NULL) {
fop->cbks.readdir(fop->req_frame, fop, fop->xl, -1,
fop->error, NULL, NULL);
}
- }
- else
- {
- if (fop->cbks.readdirp != NULL)
- {
+ } else {
+ if (fop->cbks.readdirp != NULL) {
fop->cbks.readdirp(fop->req_frame, fop, fop->xl, -1,
fop->error, NULL, NULL);
}
}
+ if (fop->offset == 0)
+ return EC_STATE_LOCK_REUSE;
+ else
+ return EC_STATE_END;
- case EC_STATE_REPORT:
- return EC_STATE_END;
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ GF_ASSERT(fop->offset == 0);
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ GF_ASSERT(fop->offset == 0);
+ ec_unlock(fop);
+ return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_readdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_readdir_cbk_t func, void * data,
- fd_t * fd, size_t size, off_t offset, dict_t * xdata)
+void
+ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readdir_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
{
- ec_cbk_t callback = { .readdir = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.readdir = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(READDIR) %p", frame);
+ gf_msg_trace("ec", 0, "EC(READDIR) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_READDIR, 0, target, minimum,
- ec_wind_readdir, ec_manager_readdir, callback,
- data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_READDIR, EC_FLAG_LOCK_SHARED,
+ target, fop_flags, ec_wind_readdir,
+ ec_manager_readdir, callback, data);
+ if (fop == NULL) {
goto out;
}
@@ -510,24 +545,22 @@ void ec_readdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
fop->size = size;
fop->offset = offset;
- if (fd != NULL)
- {
+ if (fd != NULL) {
fop->fd = fd_ref(fd);
- if (fop->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -536,86 +569,44 @@ void ec_readdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
}
}
/* FOP: readdirp */
-int32_t ec_readdirp_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno,
- gf_dirent_t * entries, dict_t * xdata)
-{
- ec_fop_data_t * fop = NULL;
- int32_t idx = (int32_t)(uintptr_t)cookie;
-
- VALIDATE_OR_GOTO(this, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- fop = frame->local;
-
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
-
- if (op_ret > 0)
- {
- ec_adjust_readdir(fop->xl->private, idx, entries);
- }
-
- if (!ec_dispatch_one_retry(fop, idx, op_ret, op_errno))
- {
- if (fop->cbks.readdirp != NULL)
- {
- fop->cbks.readdirp(fop->req_frame, fop, this, op_ret, op_errno,
- entries, xdata);
- }
- }
-
-out:
- if (fop != NULL)
- {
- ec_complete(fop);
- }
-
- return 0;
-}
-
-void ec_wind_readdirp(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_readdirp(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
- STACK_WIND_COOKIE(fop->frame, ec_readdirp_cbk, (void *)(uintptr_t)idx,
+ STACK_WIND_COOKIE(fop->frame, ec_common_readdir_cbk, (void *)(uintptr_t)idx,
ec->xl_list[idx], ec->xl_list[idx]->fops->readdirp,
fop->fd, fop->size, fop->offset, fop->xdata);
}
-void ec_readdirp(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_readdirp_cbk_t func, void * data,
- fd_t * fd, size_t size, off_t offset, dict_t * xdata)
+void
+ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readdirp_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
{
- ec_cbk_t callback = { .readdirp = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.readdirp = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(READDIRP) %p", frame);
+ gf_msg_trace("ec", 0, "EC(READDIRP) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_READDIRP, 0, target,
- minimum, ec_wind_readdirp, ec_manager_readdir,
- callback, data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(
+ frame, this, GF_FOP_READDIRP, EC_FLAG_LOCK_SHARED, target, fop_flags,
+ ec_wind_readdirp, ec_manager_readdir, callback, data);
+ if (fop == NULL) {
goto out;
}
@@ -624,24 +615,22 @@ void ec_readdirp(call_frame_t * frame, xlator_t * this, uintptr_t target,
fop->size = size;
fop->offset = offset;
- if (fd != NULL)
- {
+ if (fd != NULL) {
fop->fd = fd_ref(fd);
- if (fop->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -650,12 +639,9 @@ void ec_readdirp(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
}
}
diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c
index 866f5120bf6..53d27d895c3 100644
--- a/xlators/cluster/ec/src/ec-dir-write.c
+++ b/xlators/cluster/ec/src/ec-dir-write.c
@@ -8,9 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-
+#include "ec.h"
+#include "ec-messages.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
@@ -18,75 +17,75 @@
#include "ec-fops.h"
int
-ec_dir_write_cbk (call_frame_t *frame, xlator_t *this,
- void *cookie, int op_ret, int op_errno,
- struct iatt *poststat, struct iatt *preparent,
- struct iatt *postparent, struct iatt *preparent2,
- struct iatt *postparent2, dict_t *xdata)
+ec_dir_write_cbk(call_frame_t *frame, xlator_t *this, void *cookie, int op_ret,
+ int op_errno, struct iatt *poststat, struct iatt *preparent,
+ struct iatt *postparent, struct iatt *preparent2,
+ struct iatt *postparent2, dict_t *xdata)
{
- ec_fop_data_t *fop = NULL;
- ec_cbk_data_t *cbk = NULL;
- int i = 0;
- int idx = 0;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int i = 0;
+ int idx = 0;
- VALIDATE_OR_GOTO (this, out);
- GF_VALIDATE_OR_GOTO (this->name, frame, out);
- GF_VALIDATE_OR_GOTO (this->name, frame->local, out);
- GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = frame->local;
- idx = (long) cookie;
+ fop = frame->local;
+ idx = (long)cookie;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
- cbk = ec_cbk_data_allocate (frame, this, fop, fop->id, idx, op_ret,
- op_errno);
- if (!cbk)
- goto out;
+ cbk = ec_cbk_data_allocate(frame, this, fop, fop->id, idx, op_ret,
+ op_errno);
+ if (!cbk)
+ goto out;
- if (op_ret < 0)
- goto out;
+ if (xdata)
+ cbk->xdata = dict_ref(xdata);
- if (xdata)
- cbk->xdata = dict_ref (xdata);
+ if (op_ret < 0)
+ goto out;
- if (poststat)
- cbk->iatt[i++] = *poststat;
+ if (poststat)
+ cbk->iatt[i++] = *poststat;
- if (preparent)
- cbk->iatt[i++] = *preparent;
+ if (preparent)
+ cbk->iatt[i++] = *preparent;
- if (postparent)
- cbk->iatt[i++] = *postparent;
+ if (postparent)
+ cbk->iatt[i++] = *postparent;
- if (preparent2)
- cbk->iatt[i++] = *preparent2;
+ if (preparent2)
+ cbk->iatt[i++] = *preparent2;
- if (postparent2)
- cbk->iatt[i++] = *postparent2;
+ if (postparent2)
+ cbk->iatt[i++] = *postparent2;
out:
- if (cbk)
- ec_combine (cbk, ec_combine_write);
- if (fop)
- ec_complete (fop);
- return 0;
+ if (cbk)
+ ec_combine(cbk, ec_combine_write);
+
+ if (fop)
+ ec_complete(fop);
+ return 0;
}
/* FOP: create */
-int32_t ec_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+int32_t
+ec_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno,
- buf, preparent, postparent, NULL, NULL, xdata);
+ return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-void ec_wind_create(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_create(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -96,25 +95,33 @@ void ec_wind_create(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
fop->fd, fop->xdata);
}
-int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_create(ec_fop_data_t *fop, int32_t state)
{
+ ec_config_t config;
+ ec_t *ec;
+ ec_cbk_data_t *cbk;
+ ec_fd_t *ctx;
+ uint64_t version[2] = {0, 0};
+ int32_t err;
-
- ec_t * ec;
- ec_cbk_data_t * cbk;
- ec_fd_t * ctx;
-
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
LOCK(&fop->fd->lock);
ctx = __ec_fd_get(fop->fd, fop->xl);
- if ((ctx == NULL) ||
- (ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0])) != 0) {
+ if (ctx == NULL) {
UNLOCK(&fop->fd->lock);
- fop->error = EIO;
+ fop->error = ENOMEM;
+
+ return EC_STATE_REPORT;
+ }
+ err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
+ if (err != 0) {
+ UNLOCK(&fop->fd->lock);
+
+ fop->error = -err;
return EC_STATE_REPORT;
}
@@ -123,12 +130,10 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state)
UNLOCK(&fop->fd->lock);
- if (fop->xdata == NULL)
- {
+ if (fop->xdata == NULL) {
fop->xdata = dict_new();
- if (fop->xdata == NULL)
- {
- fop->error = EIO;
+ if (fop->xdata == NULL) {
+ fop->error = ENOMEM;
return EC_STATE_REPORT;
}
@@ -136,29 +141,29 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state)
ec = fop->xl->private;
- fop->config.version = EC_CONFIG_VERSION;
- fop->config.algorithm = EC_CONFIG_ALGORITHM;
- fop->config.gf_word_size = EC_GF_BITS;
- fop->config.bricks = ec->nodes;
- fop->config.redundancy = ec->redundancy;
- fop->config.chunk_size = EC_METHOD_CHUNK_SIZE;
+ config.version = EC_CONFIG_VERSION;
+ config.algorithm = EC_CONFIG_ALGORITHM;
+ config.gf_word_size = EC_GF_BITS;
+ config.bricks = ec->nodes;
+ config.redundancy = ec->redundancy;
+ config.chunk_size = EC_METHOD_CHUNK_SIZE;
- if (ec_dict_set_config(fop->xdata, EC_XATTR_CONFIG,
- &fop->config) < 0)
- {
- fop->error = EIO;
+ err = ec_dict_set_config(fop->xdata, EC_XATTR_CONFIG, &config);
+ if (err != 0) {
+ fop->error = -err;
return EC_STATE_REPORT;
}
-
- if (ec_dict_set_number(fop->xdata, EC_XATTR_VERSION, 0) != 0) {
- fop->error = EIO;
+ err = ec_dict_set_array(fop->xdata, EC_XATTR_VERSION, version,
+ EC_VERSION_SIZE);
+ if (err != 0) {
+ fop->error = -err;
return EC_STATE_REPORT;
}
-
- if (ec_dict_set_number(fop->xdata, EC_XATTR_SIZE, 0) != 0) {
- fop->error = EIO;
+ err = ec_dict_set_number(fop->xdata, EC_XATTR_SIZE, 0);
+ if (err != 0) {
+ fop->error = -err;
return EC_STATE_REPORT;
}
@@ -167,10 +172,11 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state)
* need to remove O_APPEND from flags (if present) */
fop->int32 &= ~O_APPEND;
- /* Fall through */
+ /* Fall through */
case EC_STATE_LOCK:
- ec_lock_prepare_entry(fop, &fop->loc[0], 1);
+ ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL,
+ EC_UPDATE_DATA | EC_UPDATE_META);
ec_lock(fop);
return EC_STATE_DISPATCH;
@@ -181,44 +187,25 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret >= 0) {
- ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3,
- cbk->count);
-
- if (ec_loc_update(fop->xl, &fop->loc[0], cbk->inode,
- &cbk->iatt[0]) != 0) {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- } else {
- LOCK(&fop->fd->lock);
-
- ctx = __ec_fd_get(fop->fd, fop->xl);
- if (ctx != NULL) {
- ctx->open |= cbk->mask;
- }
-
- UNLOCK(&fop->fd->lock);
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ int32_t err;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3, cbk->count);
+
+ err = ec_loc_update(fop->xl, &fop->loc[0], cbk->inode,
+ &cbk->iatt[0]);
+ if (!ec_cbk_set_error(cbk, -err, _gf_false)) {
+ LOCK(&fop->fd->lock);
+
+ ctx = __ec_fd_get(fop->fd, fop->xl);
+ if (ctx != NULL) {
+ ctx->open |= cbk->mask;
}
- }
- if (cbk->op_ret < 0) {
- ec_fop_set_error(fop, cbk->op_errno);
+
+ UNLOCK(&fop->fd->lock);
}
}
- else
- {
- ec_fop_set_error(fop, EIO);
- }
return EC_STATE_REPORT;
@@ -227,12 +214,11 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->cbks.create != NULL)
- {
- fop->cbks.create (fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, fop->fd, fop->loc[0].inode,
- &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
- cbk->xdata);
+ if (fop->cbks.create != NULL) {
+ QUORUM_CBK(fop->cbks.create, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->fd,
+ fop->loc[0].inode, &cbk->iatt[0], &cbk->iatt[1],
+ &cbk->iatt[2], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -244,8 +230,7 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->cbks.create != NULL)
- {
+ if (fop->cbks.create != NULL) {
fop->cbks.create(fop->req_frame, fop, fop->xl, -1, fop->error,
NULL, NULL, NULL, NULL, NULL, NULL);
}
@@ -265,35 +250,32 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_create(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_create_cbk_t func, void * data,
- loc_t * loc, int32_t flags, mode_t mode, mode_t umask,
- fd_t * fd, dict_t * xdata)
+void
+ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_create_cbk_t func, void *data, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- ec_cbk_t callback = { .create = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.create = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(CREATE) %p", frame);
+ gf_msg_trace("ec", 0, "EC(CREATE) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_CREATE,
- EC_FLAG_UPDATE_LOC_PARENT |
- EC_FLAG_UPDATE_FD_INODE, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_CREATE, 0, target, fop_flags,
ec_wind_create, ec_manager_create, callback,
data);
- if (fop == NULL)
- {
+ if (fop == NULL) {
goto out;
}
@@ -301,33 +283,30 @@ void ec_create(call_frame_t * frame, xlator_t * this, uintptr_t target,
fop->mode[0] = mode;
fop->mode[1] = umask;
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (fd != NULL)
- {
+ if (fd != NULL) {
fop->fd = fd_ref(fd);
- if (fop->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
- if (xdata != NULL)
- {
- fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -336,28 +315,26 @@ void ec_create(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL, NULL, NULL, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL, NULL, NULL, NULL);
}
}
/* FOP: link */
-int32_t ec_link_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, inode_t * inode,
- struct iatt * buf, struct iatt * preparent,
- struct iatt * postparent, dict_t * xdata)
+int32_t
+ec_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno,
- buf, preparent, postparent, NULL, NULL, xdata);
+ return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-void ec_wind_link(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_link(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -366,25 +343,19 @@ void ec_wind_link(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
&fop->loc[0], &fop->loc[1], fop->xdata);
}
-int32_t ec_manager_link(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_link(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
case EC_STATE_LOCK:
- // Parent entry of fop->loc[0] should be locked, but I don't
- // receive enough information to do it (fop->loc[0].parent is
- // NULL).
- ec_lock_prepare_entry(fop, &fop->loc[1], 1);
+ ec_lock_prepare_parent_inode(
+ fop, &fop->loc[1], &fop->loc[0],
+ EC_UPDATE_DATA | EC_UPDATE_META | EC_INODE_SIZE);
ec_lock(fop);
- return EC_STATE_GET_SIZE_AND_VERSION;
-
- case EC_STATE_GET_SIZE_AND_VERSION:
- ec_get_size_version(fop);
-
return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH:
@@ -393,37 +364,19 @@ int32_t ec_manager_link(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret >= 0) {
- ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3,
- cbk->count);
- if (cbk->iatt[0].ia_type == IA_IFREG) {
- cbk->iatt[0].ia_size = fop->pre_size;
- }
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ int32_t err;
- if (ec_loc_update(fop->xl, &fop->loc[0], cbk->inode,
- &cbk->iatt[0]) != 0) {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret < 0) {
- ec_fop_set_error(fop, cbk->op_errno);
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3, cbk->count);
+
+ if (cbk->iatt[0].ia_type == IA_IFREG) {
+ cbk->iatt[0].ia_size = fop->locks[0].size;
}
- }
- else
- {
- ec_fop_set_error(fop, EIO);
+
+ err = ec_loc_update(fop->xl, &fop->loc[0], cbk->inode,
+ &cbk->iatt[0]);
+ ec_cbk_set_error(cbk, -err, _gf_false);
}
return EC_STATE_REPORT;
@@ -433,25 +386,23 @@ int32_t ec_manager_link(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->cbks.link != NULL)
- {
- fop->cbks.link(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
- &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
+ if (fop->cbks.link != NULL) {
+ QUORUM_CBK(fop->cbks.link, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
case -EC_STATE_INIT:
case -EC_STATE_LOCK:
- case -EC_STATE_GET_SIZE_AND_VERSION:
case -EC_STATE_DISPATCH:
case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->cbks.link != NULL)
- {
+ if (fop->cbks.link != NULL) {
fop->cbks.link(fop->req_frame, fop, fop->xl, -1, fop->error,
NULL, NULL, NULL, NULL, NULL);
}
@@ -471,59 +422,56 @@ int32_t ec_manager_link(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_link(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_link_cbk_t func, void * data, loc_t * oldloc,
- loc_t * newloc, dict_t * xdata)
+void
+ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_link_cbk_t func, void *data, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
{
- ec_cbk_t callback = { .link = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.link = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(LINK) %p", frame);
+ gf_msg_trace("ec", 0, "EC(LINK) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_LINK, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_LINK, 0, target, fop_flags,
ec_wind_link, ec_manager_link, callback, data);
- if (fop == NULL)
- {
+ if (fop == NULL) {
goto out;
}
- if (oldloc != NULL)
- {
- if (loc_copy(&fop->loc[0], oldloc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (oldloc != NULL) {
+ if (loc_copy(&fop->loc[0], oldloc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (newloc != NULL)
- {
- if (loc_copy(&fop->loc[1], newloc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (newloc != NULL) {
+ if (loc_copy(&fop->loc[1], newloc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (xdata != NULL)
- {
- fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -532,28 +480,26 @@ void ec_link(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL, NULL, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL, NULL, NULL);
}
}
/* FOP: mkdir */
-int32_t ec_mkdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, inode_t * inode,
- struct iatt * buf, struct iatt * preparent,
- struct iatt * postparent, dict_t * xdata)
+int32_t
+ec_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno,
- buf, preparent, postparent, NULL, NULL, xdata);
+ return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-void ec_wind_mkdir(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_mkdir(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -562,32 +508,36 @@ void ec_wind_mkdir(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
&fop->loc[0], fop->mode[0], fop->mode[1], fop->xdata);
}
-int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_mkdir(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk;
+ uint64_t version[2] = {0, 0};
+ int32_t err;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
if (fop->xdata == NULL) {
fop->xdata = dict_new();
if (fop->xdata == NULL) {
- fop->error = EIO;
+ fop->error = ENOMEM;
return EC_STATE_REPORT;
}
}
- if (ec_dict_set_number(fop->xdata, EC_XATTR_VERSION, 0) != 0) {
- fop->error = EIO;
-
+ err = ec_dict_set_array(fop->xdata, EC_XATTR_VERSION, version,
+ EC_VERSION_SIZE);
+ if (err != 0) {
+ fop->error = -err;
return EC_STATE_REPORT;
}
- /* Fall through */
+ /* Fall through */
case EC_STATE_LOCK:
- ec_lock_prepare_entry(fop, &fop->loc[0], 1);
+ ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL,
+ EC_UPDATE_DATA | EC_UPDATE_META);
ec_lock(fop);
return EC_STATE_DISPATCH;
@@ -598,34 +548,15 @@ int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret >= 0) {
- ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3,
- cbk->count);
-
- if (ec_loc_update(fop->xl, &fop->loc[0], cbk->inode,
- &cbk->iatt[0]) != 0) {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret < 0) {
- ec_fop_set_error(fop, cbk->op_errno);
- }
- }
- else
- {
- ec_fop_set_error(fop, EIO);
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ int32_t err;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3, cbk->count);
+
+ err = ec_loc_update(fop->xl, &fop->loc[0], cbk->inode,
+ &cbk->iatt[0]);
+ ec_cbk_set_error(cbk, -err, _gf_false);
}
return EC_STATE_REPORT;
@@ -635,11 +566,11 @@ int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->cbks.mkdir != NULL)
- {
- fop->cbks.mkdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
- &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
+ if (fop->cbks.mkdir != NULL) {
+ QUORUM_CBK(fop->cbks.mkdir, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -649,12 +580,13 @@ int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_DISPATCH:
case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
+ cbk = fop->answer;
GF_ASSERT(fop->error != 0);
- if (fop->cbks.mkdir != NULL)
- {
+ if (fop->cbks.mkdir != NULL) {
fop->cbks.mkdir(fop->req_frame, fop, fop->xl, -1, fop->error,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL,
+ ((cbk) ? cbk->xdata : NULL));
}
return EC_STATE_LOCK_REUSE;
@@ -672,55 +604,51 @@ int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_mkdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_mkdir_cbk_t func, void * data, loc_t * loc,
- mode_t mode, mode_t umask, dict_t * xdata)
+void
+ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_mkdir_cbk_t func, void *data, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata)
{
- ec_cbk_t callback = { .mkdir = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.mkdir = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(MKDIR) %p", frame);
+ gf_msg_trace("ec", 0, "EC(MKDIR) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_MKDIR,
- EC_FLAG_UPDATE_LOC_PARENT, target, minimum,
- ec_wind_mkdir, ec_manager_mkdir, callback,
- data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_MKDIR, 0, target, fop_flags,
+ ec_wind_mkdir, ec_manager_mkdir, callback, data);
+ if (fop == NULL) {
goto out;
}
fop->mode[0] = mode;
fop->mode[1] = umask;
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (xdata != NULL)
- {
- fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -729,28 +657,26 @@ void ec_mkdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL, NULL, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL, NULL, NULL);
}
}
/* FOP: mknod */
-int32_t ec_mknod_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, inode_t * inode,
- struct iatt * buf, struct iatt * preparent,
- struct iatt * postparent, dict_t * xdata)
+int32_t
+ec_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno,
- buf, preparent, postparent, NULL, NULL, xdata);
+ return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-void ec_wind_mknod(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_mknod(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -760,19 +686,23 @@ void ec_wind_mknod(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
fop->xdata);
}
-int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_mknod(ec_fop_data_t *fop, int32_t state)
{
+ ec_config_t config;
ec_t *ec;
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk;
+ uint64_t version[2] = {0, 0};
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
if (S_ISREG(fop->mode[0])) {
+ int32_t err;
+
if (fop->xdata == NULL) {
fop->xdata = dict_new();
if (fop->xdata == NULL) {
- fop->error = EIO;
+ fop->error = ENOMEM;
return EC_STATE_REPORT;
}
@@ -780,37 +710,39 @@ int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state)
ec = fop->xl->private;
- fop->config.version = EC_CONFIG_VERSION;
- fop->config.algorithm = EC_CONFIG_ALGORITHM;
- fop->config.gf_word_size = EC_GF_BITS;
- fop->config.bricks = ec->nodes;
- fop->config.redundancy = ec->redundancy;
- fop->config.chunk_size = EC_METHOD_CHUNK_SIZE;
+ config.version = EC_CONFIG_VERSION;
+ config.algorithm = EC_CONFIG_ALGORITHM;
+ config.gf_word_size = EC_GF_BITS;
+ config.bricks = ec->nodes;
+ config.redundancy = ec->redundancy;
+ config.chunk_size = EC_METHOD_CHUNK_SIZE;
- if (ec_dict_set_config(fop->xdata, EC_XATTR_CONFIG,
- &fop->config) < 0) {
- fop->error = EIO;
+ err = ec_dict_set_config(fop->xdata, EC_XATTR_CONFIG, &config);
+ if (err != 0) {
+ fop->error = -err;
return EC_STATE_REPORT;
}
-
- if (ec_dict_set_number(fop->xdata, EC_XATTR_VERSION, 0) != 0) {
- fop->error = EIO;
+ err = ec_dict_set_array(fop->xdata, EC_XATTR_VERSION, version,
+ EC_VERSION_SIZE);
+ if (err != 0) {
+ fop->error = -err;
return EC_STATE_REPORT;
}
-
- if (ec_dict_set_number(fop->xdata, EC_XATTR_SIZE, 0) != 0) {
- fop->error = EIO;
+ err = ec_dict_set_number(fop->xdata, EC_XATTR_SIZE, 0);
+ if (err != 0) {
+ fop->error = -err;
return EC_STATE_REPORT;
}
}
- /* Fall through */
+ /* Fall through */
case EC_STATE_LOCK:
- ec_lock_prepare_entry(fop, &fop->loc[0], 1);
+ ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL,
+ EC_UPDATE_DATA | EC_UPDATE_META);
ec_lock(fop);
return EC_STATE_DISPATCH;
@@ -821,34 +753,15 @@ int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret >= 0) {
- ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3,
- cbk->count);
-
- if (ec_loc_update(fop->xl, &fop->loc[0], cbk->inode,
- &cbk->iatt[0]) != 0) {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret < 0) {
- ec_fop_set_error(fop, cbk->op_errno);
- }
- }
- else
- {
- ec_fop_set_error(fop, EIO);
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ int32_t err;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3, cbk->count);
+
+ err = ec_loc_update(fop->xl, &fop->loc[0], cbk->inode,
+ &cbk->iatt[0]);
+ ec_cbk_set_error(cbk, -err, _gf_false);
}
return EC_STATE_REPORT;
@@ -858,11 +771,11 @@ int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->cbks.mknod != NULL)
- {
- fop->cbks.mknod(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
- &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
+ if (fop->cbks.mknod != NULL) {
+ QUORUM_CBK(fop->cbks.mknod, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -874,8 +787,7 @@ int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->cbks.mknod != NULL)
- {
+ if (fop->cbks.mknod != NULL) {
fop->cbks.mknod(fop->req_frame, fop, fop->xl, -1, fop->error,
NULL, NULL, NULL, NULL, NULL);
}
@@ -895,33 +807,31 @@ int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_mknod(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_mknod_cbk_t func, void * data, loc_t * loc,
- mode_t mode, dev_t rdev, mode_t umask, dict_t * xdata)
+void
+ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_mknod_cbk_t func, void *data, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
{
- ec_cbk_t callback = { .mknod = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.mknod = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(MKNOD) %p", frame);
+ gf_msg_trace("ec", 0, "EC(MKNOD) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_MKNOD,
- EC_FLAG_UPDATE_LOC_PARENT, target, minimum,
- ec_wind_mknod, ec_manager_mknod, callback,
- data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_MKNOD, 0, target, fop_flags,
+ ec_wind_mknod, ec_manager_mknod, callback, data);
+ if (fop == NULL) {
goto out;
}
@@ -929,22 +839,20 @@ void ec_mknod(call_frame_t * frame, xlator_t * this, uintptr_t target,
fop->dev = rdev;
fop->mode[1] = umask;
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (xdata != NULL)
- {
- fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -953,30 +861,28 @@ void ec_mknod(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL, NULL, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL, NULL, NULL);
}
}
/* FOP: rename */
-int32_t ec_rename_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, struct iatt * buf,
- struct iatt * preoldparent, struct iatt * postoldparent,
- struct iatt * prenewparent, struct iatt * postnewparent,
- dict_t * xdata)
+int32_t
+ec_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, struct iatt *preoldparent,
+ struct iatt *postoldparent, struct iatt *prenewparent,
+ struct iatt *postnewparent, dict_t *xdata)
{
- return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno,
- buf, preoldparent, postoldparent, prenewparent,
- postnewparent, xdata);
+ return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, buf,
+ preoldparent, postoldparent, prenewparent,
+ postnewparent, xdata);
}
-void ec_wind_rename(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_rename(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -985,23 +891,21 @@ void ec_wind_rename(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
&fop->loc[0], &fop->loc[1], fop->xdata);
}
-int32_t ec_manager_rename(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_rename(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
case EC_STATE_LOCK:
- ec_lock_prepare_entry(fop, &fop->loc[0], 1);
- ec_lock_prepare_entry(fop, &fop->loc[1], 1);
+ ec_lock_prepare_parent_inode(
+ fop, &fop->loc[0], &fop->loc[0],
+ EC_UPDATE_DATA | EC_UPDATE_META | EC_INODE_SIZE);
+ ec_lock_prepare_parent_inode(fop, &fop->loc[1], NULL,
+ EC_UPDATE_DATA | EC_UPDATE_META);
ec_lock(fop);
- return EC_STATE_GET_SIZE_AND_VERSION;
-
- case EC_STATE_GET_SIZE_AND_VERSION:
- ec_get_size_version(fop);
-
return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH:
@@ -1010,36 +914,14 @@ int32_t ec_manager_rename(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret < 0)
- {
- ec_fop_set_error(fop, cbk->op_errno);
- }
- else
- {
- ec_iatt_rebuild(fop->xl->private, cbk->iatt, 5,
- cbk->count);
-
- if (cbk->iatt[0].ia_type == IA_IFREG)
- {
- cbk->iatt[0].ia_size = fop->pre_size;
- }
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 5, cbk->count);
+
+ if (cbk->iatt[0].ia_type == IA_IFREG) {
+ cbk->iatt[0].ia_size = fop->locks[0].size;
}
}
- else
- {
- ec_fop_set_error(fop, EIO);
- }
return EC_STATE_REPORT;
@@ -1048,26 +930,23 @@ int32_t ec_manager_rename(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->cbks.rename != NULL)
- {
- fop->cbks.rename(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- &cbk->iatt[2], &cbk->iatt[3], &cbk->iatt[4],
- cbk->xdata);
+ if (fop->cbks.rename != NULL) {
+ QUORUM_CBK(fop->cbks.rename, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], &cbk->iatt[2], &cbk->iatt[3],
+ &cbk->iatt[4], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
case -EC_STATE_INIT:
case -EC_STATE_LOCK:
- case -EC_STATE_GET_SIZE_AND_VERSION:
case -EC_STATE_DISPATCH:
case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->cbks.rename != NULL)
- {
+ if (fop->cbks.rename != NULL) {
fop->cbks.rename(fop->req_frame, fop, fop->xl, -1, fop->error,
NULL, NULL, NULL, NULL, NULL, NULL);
}
@@ -1087,61 +966,57 @@ int32_t ec_manager_rename(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_rename(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_rename_cbk_t func, void * data,
- loc_t * oldloc, loc_t * newloc, dict_t * xdata)
+void
+ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_rename_cbk_t func, void *data, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
{
- ec_cbk_t callback = { .rename = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.rename = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(RENAME) %p", frame);
+ gf_msg_trace("ec", 0, "EC(RENAME) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_RENAME,
- EC_FLAG_UPDATE_LOC_PARENT, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_RENAME, 0, target, fop_flags,
ec_wind_rename, ec_manager_rename, callback,
data);
- if (fop == NULL)
- {
+ if (fop == NULL) {
goto out;
}
- if (oldloc != NULL)
- {
- if (loc_copy(&fop->loc[0], oldloc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (oldloc != NULL) {
+ if (loc_copy(&fop->loc[0], oldloc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (newloc != NULL)
- {
- if (loc_copy(&fop->loc[1], newloc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (newloc != NULL) {
+ if (loc_copy(&fop->loc[1], newloc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (xdata != NULL)
- {
- fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -1150,27 +1025,26 @@ void ec_rename(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL, NULL, NULL, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL, NULL, NULL, NULL);
}
}
/* FOP: rmdir */
-int32_t ec_rmdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, struct iatt * preparent,
- struct iatt * postparent, dict_t * xdata)
+int32_t
+ec_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno, NULL,
- preparent, postparent, NULL, NULL, xdata);
+ return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
}
-void ec_wind_rmdir(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_rmdir(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -1179,15 +1053,16 @@ void ec_wind_rmdir(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
&fop->loc[0], fop->int32, fop->xdata);
}
-int32_t ec_manager_rmdir(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_rmdir(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
case EC_STATE_LOCK:
- ec_lock_prepare_entry(fop, &fop->loc[0], 1);
+ ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL,
+ EC_UPDATE_DATA | EC_UPDATE_META);
ec_lock(fop);
return EC_STATE_DISPATCH;
@@ -1198,26 +1073,7 @@ int32_t ec_manager_rmdir(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret < 0)
- {
- ec_fop_set_error(fop, cbk->op_errno);
- }
- }
- else
- {
- ec_fop_set_error(fop, EIO);
- }
+ ec_fop_prepare_answer(fop, _gf_false);
return EC_STATE_REPORT;
@@ -1226,11 +1082,10 @@ int32_t ec_manager_rmdir(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->cbks.rmdir != NULL)
- {
- fop->cbks.rmdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ if (fop->cbks.rmdir != NULL) {
+ QUORUM_CBK(fop->cbks.rmdir, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -1242,8 +1097,7 @@ int32_t ec_manager_rmdir(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->cbks.rmdir != NULL)
- {
+ if (fop->cbks.rmdir != NULL) {
fop->cbks.rmdir(fop->req_frame, fop, fop->xl, -1, fop->error,
NULL, NULL, NULL);
}
@@ -1263,54 +1117,50 @@ int32_t ec_manager_rmdir(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_rmdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_rmdir_cbk_t func, void * data, loc_t * loc,
- int xflags, dict_t * xdata)
+void
+ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_rmdir_cbk_t func, void *data, loc_t *loc,
+ int xflags, dict_t *xdata)
{
- ec_cbk_t callback = { .rmdir = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.rmdir = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(RMDIR) %p", frame);
+ gf_msg_trace("ec", 0, "EC(RMDIR) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_RMDIR,
- EC_FLAG_UPDATE_LOC_PARENT, target, minimum,
- ec_wind_rmdir, ec_manager_rmdir, callback,
- data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_RMDIR, 0, target, fop_flags,
+ ec_wind_rmdir, ec_manager_rmdir, callback, data);
+ if (fop == NULL) {
goto out;
}
fop->int32 = xflags;
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (xdata != NULL)
- {
- fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -1319,28 +1169,27 @@ void ec_rmdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
}
}
/* FOP: symlink */
-int32_t ec_symlink_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, inode_t * inode,
- struct iatt * buf, struct iatt * preparent,
- struct iatt * postparent, dict_t * xdata)
+int32_t
+ec_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno,
- buf, preparent, postparent, NULL, NULL, xdata);
+ return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-void ec_wind_symlink(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_symlink(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -1349,15 +1198,16 @@ void ec_wind_symlink(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
fop->str[0], &fop->loc[0], fop->mode[0], fop->xdata);
}
-int32_t ec_manager_symlink(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_symlink(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
case EC_STATE_LOCK:
- ec_lock_prepare_entry(fop, &fop->loc[0], 1);
+ ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL,
+ EC_UPDATE_DATA | EC_UPDATE_META);
ec_lock(fop);
return EC_STATE_DISPATCH;
@@ -1368,34 +1218,15 @@ int32_t ec_manager_symlink(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret >= 0) {
- ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3,
- cbk->count);
-
- if (ec_loc_update(fop->xl, &fop->loc[0], cbk->inode,
- &cbk->iatt[0]) != 0) {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret < 0) {
- ec_fop_set_error(fop, cbk->op_errno);
- }
- }
- else
- {
- ec_fop_set_error(fop, EIO);
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ int32_t err;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3, cbk->count);
+
+ err = ec_loc_update(fop->xl, &fop->loc[0], cbk->inode,
+ &cbk->iatt[0]);
+ ec_cbk_set_error(cbk, -err, _gf_false);
}
return EC_STATE_REPORT;
@@ -1405,12 +1236,11 @@ int32_t ec_manager_symlink(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->cbks.symlink != NULL)
- {
- fop->cbks.symlink(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, fop->loc[0].inode,
- &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
- cbk->xdata);
+ if (fop->cbks.symlink != NULL) {
+ QUORUM_CBK(fop->cbks.symlink, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -1422,8 +1252,7 @@ int32_t ec_manager_symlink(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->cbks.symlink != NULL)
- {
+ if (fop->cbks.symlink != NULL) {
fop->cbks.symlink(fop->req_frame, fop, fop->xl, -1, fop->error,
NULL, NULL, NULL, NULL, NULL);
}
@@ -1443,65 +1272,60 @@ int32_t ec_manager_symlink(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_symlink(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_symlink_cbk_t func, void * data,
- const char * linkname, loc_t * loc, mode_t umask,
- dict_t * xdata)
+void
+ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_symlink_cbk_t func, void *data,
+ const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata)
{
- ec_cbk_t callback = { .symlink = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.symlink = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(SYMLINK) %p", frame);
+ gf_msg_trace("ec", 0, "EC(SYMLINK) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_SYMLINK,
- EC_FLAG_UPDATE_LOC_PARENT, target, minimum,
- ec_wind_symlink, ec_manager_symlink, callback,
- data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SYMLINK, 0, target,
+ fop_flags, ec_wind_symlink, ec_manager_symlink,
+ callback, data);
+ if (fop == NULL) {
goto out;
}
fop->mode[0] = umask;
- if (linkname != NULL)
- {
+ if (linkname != NULL) {
fop->str[0] = gf_strdup(linkname);
- if (fop->str[0] == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to duplicate a string.");
+ if (fop->str[0] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
goto out;
}
}
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (xdata != NULL)
- {
- fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -1510,28 +1334,26 @@ void ec_symlink(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL, NULL, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL, NULL, NULL);
}
}
/* FOP: unlink */
-int32_t ec_unlink_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno,
- struct iatt * preparent, struct iatt * postparent,
- dict_t * xdata)
+int32_t
+ec_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno, NULL,
- preparent, postparent, NULL, NULL, xdata);
+ return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
}
-void ec_wind_unlink(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_unlink(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -1540,22 +1362,18 @@ void ec_wind_unlink(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
&fop->loc[0], fop->int32, fop->xdata);
}
-int32_t ec_manager_unlink(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_unlink(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
case EC_STATE_LOCK:
- ec_lock_prepare_entry(fop, &fop->loc[0], 1);
+ ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL,
+ EC_UPDATE_DATA | EC_UPDATE_META);
ec_lock(fop);
- return EC_STATE_GET_SIZE_AND_VERSION;
-
- case EC_STATE_GET_SIZE_AND_VERSION:
- ec_get_size_version(fop);
-
return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH:
@@ -1564,26 +1382,7 @@ int32_t ec_manager_unlink(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret < 0)
- {
- ec_fop_set_error(fop, cbk->op_errno);
- }
- }
- else
- {
- ec_fop_set_error(fop, EIO);
- }
+ ec_fop_prepare_answer(fop, _gf_false);
return EC_STATE_REPORT;
@@ -1592,25 +1391,22 @@ int32_t ec_manager_unlink(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->cbks.unlink != NULL)
- {
- fop->cbks.unlink(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ if (fop->cbks.unlink != NULL) {
+ QUORUM_CBK(fop->cbks.unlink, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
case -EC_STATE_INIT:
case -EC_STATE_LOCK:
- case -EC_STATE_GET_SIZE_AND_VERSION:
case -EC_STATE_DISPATCH:
case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->cbks.unlink != NULL)
- {
+ if (fop->cbks.unlink != NULL) {
fop->cbks.unlink(fop->req_frame, fop, fop->xl, -1, fop->error,
NULL, NULL, NULL);
}
@@ -1630,54 +1426,51 @@ int32_t ec_manager_unlink(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_unlink(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_unlink_cbk_t func, void * data,
- loc_t * loc, int xflags, dict_t * xdata)
+void
+ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_unlink_cbk_t func, void *data, loc_t *loc,
+ int xflags, dict_t *xdata)
{
- ec_cbk_t callback = { .unlink = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.unlink = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(UNLINK) %p", frame);
+ gf_msg_trace("ec", 0, "EC(UNLINK) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_UNLINK,
- EC_FLAG_UPDATE_LOC_PARENT, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_UNLINK, 0, target, fop_flags,
ec_wind_unlink, ec_manager_unlink, callback,
data);
- if (fop == NULL)
- {
+ if (fop == NULL) {
goto out;
}
fop->int32 = xflags;
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (xdata != NULL)
- {
- fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -1686,12 +1479,9 @@ void ec_unlink(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
}
}
diff --git a/xlators/cluster/ec/src/ec-fops.h b/xlators/cluster/ec/src/ec-fops.h
index d6b9770f720..07edf8a7fec 100644
--- a/xlators/cluster/ec/src/ec-fops.h
+++ b/xlators/cluster/ec/src/ec-fops.h
@@ -11,191 +11,244 @@
#ifndef __EC_FOPS_H__
#define __EC_FOPS_H__
-#include "xlator.h"
+#include <glusterfs/xlator.h>
-#include "ec-data.h"
+#include "ec-types.h"
#include "ec-common.h"
-#define EC_FOP_HEAL -1
-#define EC_FOP_FHEAL -2
-
-void ec_access(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_access_cbk_t func, void *data, loc_t * loc,
- int32_t mask, dict_t * xdata);
-
-void ec_create(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_create_cbk_t func, void *data, loc_t * loc,
- int32_t flags, mode_t mode, mode_t umask, fd_t * fd,
- dict_t * xdata);
-
-void ec_entrylk(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_entrylk_cbk_t func, void *data,
- const char * volume, loc_t * loc, const char * basename,
- entrylk_cmd cmd, entrylk_type type, dict_t * xdata);
-
-void ec_fentrylk(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_fentrylk_cbk_t func, void *data,
- const char * volume, fd_t * fd, const char * basename,
- entrylk_cmd cmd, entrylk_type type, dict_t * xdata);
-
-void ec_flush(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_flush_cbk_t func, void *data, fd_t * fd,
- dict_t * xdata);
-
-void ec_fsync(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_fsync_cbk_t func, void *data, fd_t * fd,
- int32_t datasync, dict_t * xdata);
-
-void ec_fsyncdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_fsyncdir_cbk_t func, void *data,
- fd_t * fd, int32_t datasync, dict_t * xdata);
-
-void ec_getxattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_getxattr_cbk_t func, void *data,
- loc_t * loc, const char * name, dict_t * xdata);
-
-void ec_fgetxattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_fgetxattr_cbk_t func, void *data,
- fd_t * fd, const char * name, dict_t * xdata);
-
-void ec_heal(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_heal_cbk_t func, void *data, loc_t * loc,
- int32_t partial, dict_t *xdata);
-
-void ec_fheal(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_fheal_cbk_t func, void *data, fd_t * fd,
- int32_t partial, dict_t *xdata);
-
-void ec_inodelk(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_inodelk_cbk_t func, void *data,
- const char * volume, loc_t * loc, int32_t cmd,
- struct gf_flock * flock, dict_t * xdata);
-
-void ec_finodelk(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_finodelk_cbk_t func, void *data,
- const char * volume, fd_t * fd, int32_t cmd,
- struct gf_flock * flock, dict_t * xdata);
-
-void ec_link(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_link_cbk_t func, void *data, loc_t * oldloc,
- loc_t * newloc, dict_t * xdata);
-
-void ec_lk(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_lk_cbk_t func, void *data, fd_t * fd,
- int32_t cmd, struct gf_flock * flock, dict_t * xdata);
-
-void ec_lookup(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_lookup_cbk_t func, void *data, loc_t * loc,
- dict_t * xdata);
-
-void ec_mkdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_mkdir_cbk_t func, void *data, loc_t * loc,
- mode_t mode, mode_t umask, dict_t * xdata);
-
-void ec_mknod(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_mknod_cbk_t func, void *data, loc_t * loc,
- mode_t mode, dev_t rdev, mode_t umask, dict_t * xdata);
-
-void ec_open(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_open_cbk_t func, void *data, loc_t * loc,
- int32_t flags, fd_t * fd, dict_t * xdata);
-
-void ec_opendir(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_opendir_cbk_t func, void *data,
- loc_t * loc, fd_t * fd, dict_t * xdata);
-
-void ec_readdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_readdir_cbk_t func, void *data, fd_t * fd,
- size_t size, off_t offset, dict_t * xdata);
-
-void ec_readdirp(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_readdirp_cbk_t func, void *data,
- fd_t * fd, size_t size, off_t offset, dict_t * xdata);
-
-void ec_readlink(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_readlink_cbk_t func, void *data,
- loc_t * loc, size_t size, dict_t * xdata);
-
-void ec_readv(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_readv_cbk_t func, void *data, fd_t * fd,
- size_t size, off_t offset, uint32_t flags, dict_t * xdata);
-
-void ec_removexattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_removexattr_cbk_t func, void *data,
- loc_t * loc, const char * name, dict_t * xdata);
-
-void ec_fremovexattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_fremovexattr_cbk_t func, void *data,
- fd_t * fd, const char * name, dict_t * xdata);
-
-void ec_rename(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_rename_cbk_t func, void *data,
- loc_t * oldloc, loc_t * newloc, dict_t * xdata);
-
-void ec_rmdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_rmdir_cbk_t func, void *data, loc_t * loc,
- int xflags, dict_t * xdata);
-
-void ec_setattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_setattr_cbk_t func, void *data,
- loc_t * loc, struct iatt * stbuf, int32_t valid,
- dict_t * xdata);
-
-void ec_fsetattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_fsetattr_cbk_t func, void *data,
- fd_t * fd, struct iatt * stbuf, int32_t valid,
- dict_t * xdata);
-
-void ec_setxattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_setxattr_cbk_t func, void *data,
- loc_t * loc, dict_t * dict, int32_t flags, dict_t * xdata);
-
-void ec_fsetxattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_fsetxattr_cbk_t func, void *data,
- fd_t * fd, dict_t * dict, int32_t flags, dict_t * xdata);
-
-void ec_stat(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_stat_cbk_t func, void *data, loc_t * loc,
- dict_t * xdata);
-
-void ec_fstat(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_fstat_cbk_t func, void *data, fd_t * fd,
- dict_t * xdata);
-
-void ec_statfs(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_statfs_cbk_t func, void *data, loc_t * loc,
- dict_t * xdata);
-
-void ec_symlink(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_symlink_cbk_t func, void *data,
- const char * linkname, loc_t * loc, mode_t umask,
- dict_t * xdata);
-
-void ec_truncate(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_truncate_cbk_t func, void *data,
- loc_t * loc, off_t offset, dict_t * xdata);
-
-void ec_ftruncate(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_ftruncate_cbk_t func, void *data,
- fd_t * fd, off_t offset, dict_t * xdata);
-
-void ec_unlink(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_unlink_cbk_t func, void *data, loc_t * loc,
- int xflags, dict_t * xdata);
-
-void ec_writev(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_writev_cbk_t func, void *data, fd_t * fd,
- struct iovec * vector, int32_t count, off_t offset,
- uint32_t flags, struct iobref * iobref, dict_t * xdata);
-
-void ec_xattrop(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_xattrop_cbk_t func, void *data,
- loc_t * loc, gf_xattrop_flags_t optype, dict_t * xattr,
- dict_t * xdata);
-
-void ec_fxattrop(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_fxattrop_cbk_t func, void *data,
- fd_t * fd, gf_xattrop_flags_t optype, dict_t * xattr,
- dict_t * xdata);
+void
+ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_access_cbk_t func, void *data, loc_t *loc,
+ int32_t mask, dict_t *xdata);
+
+void
+ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_create_cbk_t func, void *data, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
+
+void
+ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_entrylk_cbk_t func, void *data,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
+
+void
+ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fentrylk_cbk_t func, void *data,
+ const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+void
+ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd,
+ dict_t *xdata);
+
+void
+ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsync_cbk_t func, void *data, fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+void
+ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsyncdir_cbk_t func, void *data, fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+void
+ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_getxattr_cbk_t func, void *data, loc_t *loc,
+ const char *name, dict_t *xdata);
+
+void
+ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fgetxattr_cbk_t func, void *data, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+void
+ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_heal_cbk_t func, void *data, loc_t *loc,
+ int32_t partial, dict_t *xdata);
+
+void
+ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fheal_cbk_t func, void *data, fd_t *fd,
+ int32_t partial, dict_t *xdata);
+
+void
+ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
+ uintptr_t target, uint32_t fop_flags, fop_inodelk_cbk_t func,
+ void *data, const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+void
+ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
+ uintptr_t target, uint32_t fop_flags, fop_finodelk_cbk_t func,
+ void *data, const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+void
+ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_link_cbk_t func, void *data, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+void
+ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, uint32_t fop_flags,
+ fop_lk_cbk_t func, void *data, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+void
+ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_lookup_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata);
+
+void
+ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_mkdir_cbk_t func, void *data, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata);
+
+void
+ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_mknod_cbk_t func, void *data, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata);
+
+void
+ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_open_cbk_t func, void *data, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata);
+
+void
+ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_opendir_cbk_t func, void *data, loc_t *loc,
+ fd_t *fd, dict_t *xdata);
+
+void
+ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readdir_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata);
+
+void
+ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readdirp_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata);
+
+void
+ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readlink_cbk_t func, void *data, loc_t *loc,
+ size_t size, dict_t *xdata);
+
+void
+ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readv_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata);
+
+void
+ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_removexattr_cbk_t func, void *data,
+ loc_t *loc, const char *name, dict_t *xdata);
+
+void
+ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fremovexattr_cbk_t func, void *data,
+ fd_t *fd, const char *name, dict_t *xdata);
+
+void
+ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_rename_cbk_t func, void *data, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+void
+ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_rmdir_cbk_t func, void *data, loc_t *loc,
+ int xflags, dict_t *xdata);
+
+void
+ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_setattr_cbk_t func, void *data, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+void
+ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsetattr_cbk_t func, void *data, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+void
+ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_setxattr_cbk_t func, void *data, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata);
+
+void
+ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsetxattr_cbk_t func, void *data, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata);
+
+void
+ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_stat_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata);
+
+void
+ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fstat_cbk_t func, void *data, fd_t *fd,
+ dict_t *xdata);
+
+void
+ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_statfs_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata);
+
+void
+ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_symlink_cbk_t func, void *data,
+ const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata);
+
+void
+ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fallocate_cbk_t func, void *data, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata);
+
+void
+ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_discard_cbk_t func, void *data, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata);
+
+void
+ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_truncate_cbk_t func, void *data, loc_t *loc,
+ off_t offset, dict_t *xdata);
+
+void
+ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_ftruncate_cbk_t func, void *data, fd_t *fd,
+ off_t offset, dict_t *xdata);
+
+void
+ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_unlink_cbk_t func, void *data, loc_t *loc,
+ int xflags, dict_t *xdata);
+
+void
+ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_writev_cbk_t func, void *data, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
+
+void
+ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_xattrop_cbk_t func, void *data, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
+
+void
+ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fxattrop_cbk_t func, void *data, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
+
+void
+ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_seek_cbk_t func, void *data, fd_t *fd,
+ off_t offset, gf_seek_what_t what, dict_t *xdata);
+
+void
+ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_ipc_cbk_t func, void *data, int32_t op,
+ dict_t *xdata);
#endif /* __EC_FOPS_H__ */
diff --git a/xlators/cluster/ec/src/ec-galois.c b/xlators/cluster/ec/src/ec-galois.c
new file mode 100644
index 00000000000..6e4990c71f5
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-galois.c
@@ -0,0 +1,183 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <string.h>
+
+#include "ec-mem-types.h"
+#include "ec-gf8.h"
+#include "ec-helpers.h"
+
+static ec_gf_t *
+ec_gf_alloc(uint32_t bits, uint32_t mod)
+{
+ ec_gf_t *gf;
+
+ gf = GF_MALLOC(sizeof(ec_gf_t), ec_mt_ec_gf_t);
+ if (gf == NULL) {
+ goto failed;
+ }
+
+ gf->bits = bits;
+ gf->size = 1 << bits;
+ gf->mod = mod;
+
+ gf->log = GF_MALLOC(sizeof(uint32_t) * (gf->size * 2 - 1),
+ gf_common_mt_int);
+ if (gf->log == NULL) {
+ goto failed_gf;
+ }
+ gf->pow = GF_MALLOC(sizeof(uint32_t) * (gf->size * 2 - 1),
+ gf_common_mt_int);
+ if (gf->pow == NULL) {
+ goto failed_log;
+ }
+
+ return gf;
+
+failed_log:
+ GF_FREE(gf->log);
+failed_gf:
+ GF_FREE(gf);
+failed:
+ return EC_ERR(ENOMEM);
+}
+
+static void
+ec_gf_init_tables(ec_gf_t *gf)
+{
+ uint32_t i, tmp;
+
+ memset(gf->log, -1, sizeof(uint32_t) * gf->size);
+
+ gf->pow[0] = 1;
+ gf->log[0] = gf->size;
+ gf->log[1] = 0;
+ for (i = 1; i < gf->size; i++) {
+ tmp = gf->pow[i - 1] << 1;
+ if (tmp >= gf->size) {
+ tmp ^= gf->mod;
+ }
+ gf->pow[i + gf->size - 1] = gf->pow[i] = tmp;
+ gf->log[tmp + gf->size - 1] = gf->log[tmp] = i;
+ }
+}
+
+ec_gf_t *
+ec_gf_prepare(uint32_t bits, uint32_t mod)
+{
+ ec_gf_mul_t **tbl;
+ ec_gf_t *gf;
+ uint32_t i, j;
+
+ if (bits != 8) {
+ return EC_ERR(EINVAL);
+ }
+
+ tbl = ec_gf8_mul;
+ if (mod == 0) {
+ mod = 0x11d;
+ }
+
+ gf = ec_gf_alloc(bits, mod);
+ if (EC_IS_ERR(gf)) {
+ return gf;
+ }
+ ec_gf_init_tables(gf);
+
+ gf->table = tbl;
+ gf->min_ops = bits * bits;
+ gf->max_ops = 0;
+ gf->avg_ops = 0;
+ for (i = 1; i < gf->size; i++) {
+ for (j = 0; tbl[i]->ops[j].op != EC_GF_OP_END; j++) {
+ }
+ if (gf->max_ops < j) {
+ gf->max_ops = j;
+ }
+ if (gf->min_ops > j) {
+ gf->min_ops = j;
+ }
+ gf->avg_ops += j;
+ }
+ gf->avg_ops /= gf->size;
+
+ return gf;
+}
+
+void
+ec_gf_destroy(ec_gf_t *gf)
+{
+ GF_FREE(gf->pow);
+ GF_FREE(gf->log);
+ GF_FREE(gf);
+}
+
+uint32_t
+ec_gf_add(ec_gf_t *gf, uint32_t a, uint32_t b)
+{
+ if ((a >= gf->size) || (b >= gf->size)) {
+ return gf->size;
+ }
+
+ return a ^ b;
+}
+
+uint32_t
+ec_gf_mul(ec_gf_t *gf, uint32_t a, uint32_t b)
+{
+ if ((a >= gf->size) || (b >= gf->size)) {
+ return gf->size;
+ }
+
+ if ((a != 0) && (b != 0)) {
+ return gf->pow[gf->log[a] + gf->log[b]];
+ }
+
+ return 0;
+}
+
+uint32_t
+ec_gf_div(ec_gf_t *gf, uint32_t a, uint32_t b)
+{
+ if ((a >= gf->size) || (b >= gf->size)) {
+ return gf->size;
+ }
+
+ if (b != 0) {
+ if (a != 0) {
+ return gf->pow[gf->size - 1 + gf->log[a] - gf->log[b]];
+ }
+
+ return 0;
+ }
+
+ return gf->size;
+}
+
+uint32_t
+ec_gf_exp(ec_gf_t *gf, uint32_t a, uint32_t b)
+{
+ uint32_t r;
+
+ if ((a >= gf->size) || ((a == 0) && (b == 0))) {
+ return gf->size;
+ }
+
+ r = 1;
+ while (b != 0) {
+ if ((b & 1) != 0) {
+ r = ec_gf_mul(gf, r, a);
+ }
+ a = ec_gf_mul(gf, a, a);
+ b >>= 1;
+ }
+
+ return r;
+}
diff --git a/xlators/cluster/ec/src/ec-galois.h b/xlators/cluster/ec/src/ec-galois.h
new file mode 100644
index 00000000000..ed55d53e419
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-galois.h
@@ -0,0 +1,32 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_GALOIS_H__
+#define __EC_GALOIS_H__
+
+#include <inttypes.h>
+
+#include "ec-types.h"
+
+ec_gf_t *
+ec_gf_prepare(uint32_t bits, uint32_t mod);
+void
+ec_gf_destroy(ec_gf_t *gf);
+
+uint32_t
+ec_gf_add(ec_gf_t *gf, uint32_t a, uint32_t b);
+uint32_t
+ec_gf_mul(ec_gf_t *gf, uint32_t a, uint32_t b);
+uint32_t
+ec_gf_div(ec_gf_t *gf, uint32_t a, uint32_t b);
+uint32_t
+ec_gf_exp(ec_gf_t *gf, uint32_t a, uint32_t b);
+
+#endif /* __EC_GALOIS_H__ */
diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c
index f80770a3365..884deb93669 100644
--- a/xlators/cluster/ec/src/ec-generic.c
+++ b/xlators/cluster/ec/src/ec-generic.c
@@ -8,24 +8,23 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/byte-order.h>
+#include "ec.h"
+#include "ec-messages.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
-#include "ec-method.h"
#include "ec-fops.h"
-#include "byte-order.h"
-#define EC_SELFHEAL_BIT 62
/* FOP: flush */
-int32_t ec_flush_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, dict_t * xdata)
+int32_t
+ec_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- ec_fop_data_t * fop = NULL;
- ec_cbk_data_t * cbk = NULL;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
@@ -35,20 +34,18 @@ int32_t ec_flush_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
fop = frame->local;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FLUSH, idx, op_ret,
op_errno);
- if (cbk != NULL)
- {
- if (xdata != NULL)
- {
+ if (cbk != NULL) {
+ if (xdata != NULL) {
cbk->xdata = dict_ref(xdata);
- if (cbk->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -58,15 +55,15 @@ int32_t ec_flush_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
}
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_complete(fop);
}
return 0;
}
-void ec_wind_flush(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_flush(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -75,15 +72,15 @@ void ec_wind_flush(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
fop->xdata);
}
-int32_t ec_manager_flush(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_flush(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
case EC_STATE_LOCK:
- ec_lock_prepare_fd(fop, fop->fd, 0);
+ ec_lock_prepare_fd(fop, fop->fd, 0, 0, EC_RANGE_FULL);
ec_lock(fop);
return EC_STATE_DISPATCH;
@@ -99,26 +96,7 @@ int32_t ec_manager_flush(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret < 0)
- {
- ec_fop_set_error(fop, cbk->op_errno);
- }
- }
- else
- {
- ec_fop_set_error(fop, EIO);
- }
+ ec_fop_prepare_answer(fop, _gf_false);
return EC_STATE_REPORT;
@@ -127,8 +105,7 @@ int32_t ec_manager_flush(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->cbks.flush != NULL)
- {
+ if (fop->cbks.flush != NULL) {
fop->cbks.flush(fop->req_frame, fop, fop->xl, cbk->op_ret,
cbk->op_errno, cbk->xdata);
}
@@ -137,13 +114,13 @@ int32_t ec_manager_flush(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_INIT:
case -EC_STATE_LOCK:
+ case -EC_STATE_DELAYED_START:
case -EC_STATE_DISPATCH:
case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->cbks.flush != NULL)
- {
+ if (fop->cbks.flush != NULL) {
fop->cbks.flush(fop->req_frame, fop, fop->xl, -1, fop->error,
NULL);
}
@@ -163,55 +140,93 @@ int32_t ec_manager_flush(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_flush(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_flush_cbk_t func, void * data, fd_t * fd,
- dict_t * xdata)
+static int32_t
+ec_validate_fd(fd_t *fd, xlator_t *xl)
+{
+ uint64_t iversion = 0;
+ uint64_t fversion = 0;
+ ec_inode_t *inode_ctx = NULL;
+ ec_fd_t *fd_ctx = NULL;
+
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __ec_fd_get(fd, xl);
+ if (fd_ctx) {
+ fversion = fd_ctx->bad_version;
+ }
+ }
+ UNLOCK(&fd->lock);
+
+ LOCK(&fd->inode->lock);
+ {
+ inode_ctx = __ec_inode_get(fd->inode, xl);
+ if (inode_ctx) {
+ iversion = inode_ctx->bad_version;
+ }
+ }
+ UNLOCK(&fd->inode->lock);
+ if (fversion < iversion) {
+ return EBADF;
+ }
+ return 0;
+}
+
+void
+ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd,
+ dict_t *xdata)
{
- ec_cbk_t callback = { .flush = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.flush = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(FLUSH) %p", frame);
+ gf_msg_trace("ec", 0, "EC(FLUSH) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, EC_FLAG_UPDATE_FD,
- target, minimum, ec_wind_flush,
- ec_manager_flush, callback, data);
- if (fop == NULL)
- {
+ if (fd) {
+ error = ec_validate_fd(fd, this);
+ if (error) {
+ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
+ "Failing %s on %s", gf_fop_list[GF_FOP_FLUSH],
+ fd->inode ? uuid_utoa(fd->inode->gfid) : "");
+ goto out;
+ }
+ }
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, fop_flags,
+ ec_wind_flush, ec_manager_flush, callback, data);
+ if (fop == NULL) {
goto out;
}
fop->use_fd = 1;
- if (fd != NULL)
- {
+ if (fd != NULL) {
fop->fd = fd_ref(fd);
- if (fop->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -220,25 +235,22 @@ void ec_flush(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
}
}
/* FOP: fsync */
-int32_t ec_combine_fsync(ec_fop_data_t * fop, ec_cbk_data_t * dst,
- ec_cbk_data_t * src)
+int32_t
+ec_combine_fsync(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
{
- if (!ec_iatt_combine(dst->iatt, src->iatt, 2))
- {
- gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching iatt in "
- "answers of 'GF_FOP_FSYNC'");
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 2)) {
+ gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_IATT_MISMATCH,
+ "Mismatching iatt in "
+ "answers of 'GF_FOP_FSYNC'");
return 0;
}
@@ -246,12 +258,13 @@ int32_t ec_combine_fsync(ec_fop_data_t * fop, ec_cbk_data_t * dst,
return 1;
}
-int32_t ec_fsync_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, struct iatt * prebuf,
- struct iatt * postbuf, dict_t * xdata)
+int32_t
+ec_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- ec_fop_data_t * fop = NULL;
- ec_cbk_data_t * cbk = NULL;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
@@ -261,31 +274,26 @@ int32_t ec_fsync_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
fop = frame->local;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FSYNC, idx, op_ret,
op_errno);
- if (cbk != NULL)
- {
- if (op_ret >= 0)
- {
- if (prebuf != NULL)
- {
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ if (prebuf != NULL) {
cbk->iatt[0] = *prebuf;
}
- if (postbuf != NULL)
- {
+ if (postbuf != NULL) {
cbk->iatt[1] = *postbuf;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
cbk->xdata = dict_ref(xdata);
- if (cbk->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -295,15 +303,15 @@ int32_t ec_fsync_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
}
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_complete(fop);
}
return 0;
}
-void ec_wind_fsync(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_fsync(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -312,22 +320,17 @@ void ec_wind_fsync(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
fop->int32, fop->xdata);
}
-int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_fsync(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
case EC_STATE_LOCK:
- ec_lock_prepare_fd(fop, fop->fd, 0);
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, 0, EC_RANGE_FULL);
ec_lock(fop);
- return EC_STATE_GET_SIZE_AND_VERSION;
-
- case EC_STATE_GET_SIZE_AND_VERSION:
- ec_get_size_version(fop);
-
return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH:
@@ -341,33 +344,14 @@ int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret < 0)
- {
- ec_fop_set_error(fop, cbk->op_errno);
- }
- else
- {
- ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2,
- cbk->count);
-
- cbk->iatt[0].ia_size = fop->pre_size;
- cbk->iatt[1].ia_size = fop->post_size;
- }
- }
- else
- {
- ec_fop_set_error(fop, EIO);
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count);
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop, fop->fd->inode,
+ &cbk->iatt[0].ia_size));
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
}
return EC_STATE_REPORT;
@@ -377,8 +361,7 @@ int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->cbks.fsync != NULL)
- {
+ if (fop->cbks.fsync != NULL) {
fop->cbks.fsync(fop->req_frame, fop, fop->xl, cbk->op_ret,
cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
cbk->xdata);
@@ -388,14 +371,13 @@ int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_INIT:
case -EC_STATE_LOCK:
- case -EC_STATE_GET_SIZE_AND_VERSION:
case -EC_STATE_DISPATCH:
case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
+ case -EC_STATE_DELAYED_START:
GF_ASSERT(fop->error != 0);
- if (fop->cbks.fsync != NULL)
- {
+ if (fop->cbks.fsync != NULL) {
fop->cbks.fsync(fop->req_frame, fop, fop->xl, -1, fop->error,
NULL, NULL, NULL);
}
@@ -415,32 +397,41 @@ int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_fsync(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_fsync_cbk_t func, void * data, fd_t * fd,
- int32_t datasync, dict_t * xdata)
+void
+ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsync_cbk_t func, void *data, fd_t *fd,
+ int32_t datasync, dict_t *xdata)
{
- ec_cbk_t callback = { .fsync = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.fsync = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(FSYNC) %p", frame);
+ gf_msg_trace("ec", 0, "EC(FSYNC) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, EC_FLAG_UPDATE_FD,
- target, minimum, ec_wind_fsync,
- ec_manager_fsync, callback, data);
- if (fop == NULL)
- {
+ if (fd) {
+ error = ec_validate_fd(fd, this);
+ if (error) {
+ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
+ "Failing %s on %s", gf_fop_list[GF_FOP_FSYNC],
+ fd->inode ? uuid_utoa(fd->inode->gfid) : "");
+ goto out;
+ }
+ }
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, fop_flags,
+ ec_wind_fsync, ec_manager_fsync, callback, data);
+ if (fop == NULL) {
goto out;
}
@@ -448,24 +439,22 @@ void ec_fsync(call_frame_t * frame, xlator_t * this, uintptr_t target,
fop->int32 = datasync;
- if (fd != NULL)
- {
+ if (fd != NULL) {
fop->fd = fd_ref(fd);
- if (fop->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -474,23 +463,21 @@ void ec_fsync(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
}
}
/* FOP: fsyncdir */
-int32_t ec_fsyncdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, dict_t * xdata)
+int32_t
+ec_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- ec_fop_data_t * fop = NULL;
- ec_cbk_data_t * cbk = NULL;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
@@ -500,20 +487,18 @@ int32_t ec_fsyncdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
fop = frame->local;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FSYNCDIR, idx, op_ret,
op_errno);
- if (cbk != NULL)
- {
- if (xdata != NULL)
- {
+ if (cbk != NULL) {
+ if (xdata != NULL) {
cbk->xdata = dict_ref(xdata);
- if (cbk->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -523,15 +508,15 @@ int32_t ec_fsyncdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
}
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_complete(fop);
}
return 0;
}
-void ec_wind_fsyncdir(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_fsyncdir(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -540,15 +525,15 @@ void ec_wind_fsyncdir(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
fop->fd, fop->int32, fop->xdata);
}
-int32_t ec_manager_fsyncdir(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_fsyncdir(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
case EC_STATE_LOCK:
- ec_lock_prepare_fd(fop, fop->fd, 0);
+ ec_lock_prepare_fd(fop, fop->fd, 0, 0, EC_RANGE_FULL);
ec_lock(fop);
return EC_STATE_DISPATCH;
@@ -564,26 +549,7 @@ int32_t ec_manager_fsyncdir(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret < 0)
- {
- ec_fop_set_error(fop, cbk->op_errno);
- }
- }
- else
- {
- ec_fop_set_error(fop, EIO);
- }
+ ec_fop_prepare_answer(fop, _gf_false);
return EC_STATE_REPORT;
@@ -592,8 +558,7 @@ int32_t ec_manager_fsyncdir(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->cbks.fsyncdir != NULL)
- {
+ if (fop->cbks.fsyncdir != NULL) {
fop->cbks.fsyncdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
cbk->op_errno, cbk->xdata);
}
@@ -605,12 +570,12 @@ int32_t ec_manager_fsyncdir(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_DISPATCH:
case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
+ case -EC_STATE_DELAYED_START:
GF_ASSERT(fop->error != 0);
- if (fop->cbks.fsyncdir != NULL)
- {
- fop->cbks.fsyncdir(fop->req_frame, fop, fop->xl, -1,
- fop->error, NULL);
+ if (fop->cbks.fsyncdir != NULL) {
+ fop->cbks.fsyncdir(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL);
}
return EC_STATE_LOCK_REUSE;
@@ -628,32 +593,32 @@ int32_t ec_manager_fsyncdir(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_fsyncdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_fsyncdir_cbk_t func, void * data,
- fd_t * fd, int32_t datasync, dict_t * xdata)
+void
+ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsyncdir_cbk_t func, void *data, fd_t *fd,
+ int32_t datasync, dict_t *xdata)
{
- ec_cbk_t callback = { .fsyncdir = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.fsyncdir = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(FSYNCDIR) %p", frame);
+ gf_msg_trace("ec", 0, "EC(FSYNCDIR) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNCDIR, EC_FLAG_UPDATE_FD,
- target, minimum, ec_wind_fsyncdir,
- ec_manager_fsyncdir, callback, data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNCDIR, 0, target,
+ fop_flags, ec_wind_fsyncdir, ec_manager_fsyncdir,
+ callback, data);
+ if (fop == NULL) {
goto out;
}
@@ -661,24 +626,22 @@ void ec_fsyncdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
fop->int32 = datasync;
- if (fd != NULL)
- {
+ if (fd != NULL) {
fop->fd = fd_ref(fd);
- if (fop->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -687,139 +650,66 @@ void ec_fsyncdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
}
}
/* FOP: lookup */
-void ec_lookup_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk)
+void
+ec_lookup_rebuild(ec_t *ec, ec_fop_data_t *fop, ec_cbk_data_t *cbk)
{
- ec_cbk_data_t * ans = NULL;
- ec_inode_t * ctx = NULL;
- ec_lock_t * lock = NULL;
- data_t * data = NULL;
- uint8_t * buff = NULL;
+ ec_inode_t *ctx = NULL;
uint64_t size = 0;
- int32_t i = 0, have_size = 0;
+ int32_t have_size = 0, err;
- if (cbk->op_ret < 0)
- {
+ if (cbk->op_ret < 0) {
return;
}
- ec_dict_del_number(cbk->xdata, EC_XATTR_VERSION, &cbk->version);
-
- if (ec_loc_update(fop->xl, &fop->loc[0], cbk->inode, &cbk->iatt[0]) != 0) {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
+ ec_dict_del_array(cbk->xdata, EC_XATTR_VERSION, cbk->version,
+ EC_VERSION_SIZE);
+ err = ec_loc_update(fop->xl, &fop->loc[0], cbk->inode, &cbk->iatt[0]);
+ if (ec_cbk_set_error(cbk, -err, _gf_true)) {
return;
}
LOCK(&cbk->inode->lock);
ctx = __ec_inode_get(cbk->inode, fop->xl);
- if ((ctx != NULL) && (ctx->inode_lock != NULL))
- {
- lock = ctx->inode_lock;
- cbk->version = lock->version;
- if (lock->have_size)
- {
- size = lock->size;
+ if (ctx != NULL) {
+ if (ctx->have_version) {
+ cbk->version[0] = ctx->post_version[0];
+ cbk->version[1] = ctx->post_version[1];
+ }
+ if (ctx->have_size) {
+ size = ctx->post_size;
have_size = 1;
}
}
UNLOCK(&cbk->inode->lock);
- if (cbk->iatt[0].ia_type == IA_IFREG)
- {
- uint8_t * blocks[cbk->count];
- uint32_t values[cbk->count];
-
+ if (cbk->iatt[0].ia_type == IA_IFREG) {
cbk->size = cbk->iatt[0].ia_size;
ec_dict_del_number(cbk->xdata, EC_XATTR_SIZE, &cbk->iatt[0].ia_size);
- if (have_size)
- {
+ if (have_size) {
cbk->iatt[0].ia_size = size;
}
-
- size = UINT64_MAX;
- for (i = 0, ans = cbk; (ans != NULL) && (i < ec->fragments);
- ans = ans->next)
- {
- if (!ans->dirty) {
- data = dict_get(ans->xdata, GF_CONTENT_KEY);
- if (data != NULL)
- {
- values[i] = ans->idx;
- blocks[i] = (uint8_t *)data->data;
- if (size > data->len) {
- size = data->len;
- }
- i++;
- }
- }
- }
-
- if (i >= ec->fragments)
- {
- size -= size % ec->fragment_size;
- if (size > 0)
- {
- buff = GF_MALLOC(size * ec->fragments, gf_common_mt_char);
- if (buff != NULL)
- {
- size = ec_method_decode(size, ec->fragments, values,
- blocks, buff);
- if (size > fop->size)
- {
- size = fop->size;
- }
- if (size > cbk->iatt[0].ia_size)
- {
- size = cbk->iatt[0].ia_size;
- }
-
- if (dict_set_bin(cbk->xdata, GF_CONTENT_KEY, buff,
- size) != 0)
- {
- GF_FREE(buff);
- buff = NULL;
- gf_log(fop->xl->name, GF_LOG_WARNING, "Lookup "
- "read-ahead "
- "failed");
- }
- }
- else
- {
- gf_log(fop->xl->name, GF_LOG_WARNING, "Lookup read-ahead "
- "failed");
- }
- }
- }
-
- if (buff == NULL)
- {
- dict_del(cbk->xdata, GF_CONTENT_KEY);
- }
}
}
-int32_t ec_combine_lookup(ec_fop_data_t * fop, ec_cbk_data_t * dst,
- ec_cbk_data_t * src)
+int32_t
+ec_combine_lookup(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
{
- if (!ec_iatt_combine(dst->iatt, src->iatt, 2))
- {
- gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching iatt in "
- "answers of 'GF_FOP_LOOKUP'");
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 2)) {
+ gf_msg(fop->xl->name, GF_LOG_DEBUG, 0, EC_MSG_IATT_MISMATCH,
+ "Mismatching iatt in "
+ "answers of 'GF_FOP_LOOKUP'");
return 0;
}
@@ -827,14 +717,15 @@ int32_t ec_combine_lookup(ec_fop_data_t * fop, ec_cbk_data_t * dst,
return 1;
}
-int32_t ec_lookup_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, inode_t * inode,
- struct iatt * buf, dict_t * xdata,
- struct iatt * postparent)
+int32_t
+ec_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
{
- ec_fop_data_t * fop = NULL;
- ec_cbk_data_t * cbk = NULL;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
+ uint64_t dirty[2] = {0};
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
@@ -843,65 +734,54 @@ int32_t ec_lookup_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
fop = frame->local;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_LOOKUP, idx, op_ret,
op_errno);
- if (cbk != NULL)
- {
- if (op_ret >= 0)
- {
- if (inode != NULL)
- {
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ if (inode != NULL) {
cbk->inode = inode_ref(inode);
- if (cbk->inode == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR,
+ if (cbk->inode == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_INODE_REF_FAIL,
"Failed to reference an inode.");
goto out;
}
}
- if (buf != NULL)
- {
+ if (buf != NULL) {
cbk->iatt[0] = *buf;
}
- if (postparent != NULL)
- {
+ if (postparent != NULL) {
cbk->iatt[1] = *postparent;
}
}
- if (xdata != NULL)
- {
- uint64_t dirty;
-
+ if (xdata != NULL) {
cbk->xdata = dict_ref(xdata);
- if (cbk->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
- if (ec_dict_del_number(cbk->xdata, EC_XATTR_DIRTY, &dirty) == 0) {
- cbk->dirty = dirty != 0;
- }
+ ec_dict_del_array(xdata, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE);
}
ec_combine(cbk, ec_combine_lookup);
}
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_complete(fop);
}
return 0;
}
-void ec_wind_lookup(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_lookup(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -910,54 +790,49 @@ void ec_wind_lookup(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
&fop->loc[0], fop->xdata);
}
-int32_t ec_manager_lookup(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_lookup(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk;
+ int32_t err;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
- if (fop->xdata == NULL)
- {
+ if (fop->xdata == NULL) {
fop->xdata = dict_new();
- if (fop->xdata == NULL)
- {
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unable to prepare "
- "lookup request");
+ if (fop->xdata == NULL) {
+ gf_msg(fop->xl->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOOKUP_REQ_PREP_FAIL,
+ "Unable to prepare "
+ "lookup request");
- fop->error = EIO;
+ fop->error = ENOMEM;
return EC_STATE_REPORT;
}
+ } else {
+ /*TODO: To be handled once we have 'syndromes' */
+ dict_del(fop->xdata, GF_CONTENT_KEY);
}
- else
- {
- uint64_t size;
-
- if (dict_get_uint64(fop->xdata, GF_CONTENT_KEY, &size) == 0)
- {
- fop->size = size;
- size = ec_adjust_size(fop->xl->private, size, 1);
- if (dict_set_uint64(fop->xdata, GF_CONTENT_KEY, size) != 0)
- {
- gf_log("ec", GF_LOG_DEBUG, "Unable to update lookup "
- "content size");
- }
- }
+ err = dict_set_uint64(fop->xdata, EC_XATTR_SIZE, 0);
+ if (err == 0) {
+ err = dict_set_uint64(fop->xdata, EC_XATTR_VERSION, 0);
}
- if ((dict_set_uint64(fop->xdata, EC_XATTR_SIZE, 0) != 0) ||
- (dict_set_uint64(fop->xdata, EC_XATTR_VERSION, 0) != 0) ||
- (dict_set_uint64(fop->xdata, EC_XATTR_DIRTY, 0) != 0))
- {
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unable to prepare lookup "
- "request");
+ if (err == 0) {
+ err = dict_set_uint64(fop->xdata, EC_XATTR_DIRTY, 0);
+ }
+ if (err != 0) {
+ gf_msg(fop->xl->name, GF_LOG_ERROR, -err,
+ EC_MSG_LOOKUP_REQ_PREP_FAIL,
+ "Unable to prepare lookup "
+ "request");
- fop->error = EIO;
+ fop->error = -err;
return EC_STATE_REPORT;
}
- /* Fall through */
+ /* Fall through */
case EC_STATE_DISPATCH:
ec_dispatch_all(fop);
@@ -965,32 +840,23 @@ int32_t ec_manager_lookup(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret < 0)
- {
- ec_fop_set_error(fop, cbk->op_errno);
- }
- else
- {
- ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2,
- cbk->count);
-
- ec_lookup_rebuild(fop->xl->private, fop, cbk);
- }
+ /*
+ * Lookup happens without any lock, so there is a chance that it
+ * will have answers before modification happened and after
+ * modification happened in the same response. So choose the next
+ * best answer when the answers don't match for EC_MINIMUM_MIN
+ */
+
+ if (!fop->answer && !list_empty(&fop->cbk_list)) {
+ fop->answer = list_entry(fop->cbk_list.next, ec_cbk_data_t,
+ list);
}
- else
- {
- ec_fop_set_error(fop, EIO);
+
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
+ if (cbk != NULL) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count);
+
+ ec_lookup_rebuild(fop->xl->private, fop, cbk);
}
return EC_STATE_REPORT;
@@ -1000,8 +866,7 @@ int32_t ec_manager_lookup(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->cbks.lookup != NULL)
- {
+ if (fop->cbks.lookup != NULL) {
fop->cbks.lookup(fop->req_frame, fop, fop->xl, cbk->op_ret,
cbk->op_errno, cbk->inode, &cbk->iatt[0],
cbk->xdata, &cbk->iatt[1]);
@@ -1015,8 +880,7 @@ int32_t ec_manager_lookup(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->cbks.lookup != NULL)
- {
+ if (fop->cbks.lookup != NULL) {
fop->cbks.lookup(fop->req_frame, fop, fop->xl, -1, fop->error,
NULL, NULL, NULL, NULL);
}
@@ -1024,85 +888,77 @@ int32_t ec_manager_lookup(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_lookup(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_lookup_cbk_t func, void * data,
- loc_t * loc, dict_t * xdata)
+void
+ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_lookup_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata)
{
- ec_cbk_t callback = { .lookup = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.lookup = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(LOOKUP) %p", frame);
+ gf_msg_trace("ec", 0, "EC(LOOKUP) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_LOOKUP, 0, target, minimum,
- ec_wind_lookup, ec_manager_lookup, callback,
- data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_LOOKUP, EC_FLAG_LOCK_SHARED,
+ target, fop_flags, ec_wind_lookup,
+ ec_manager_lookup, callback, data);
+ if (fop == NULL) {
goto out;
}
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (xdata != NULL)
- {
- fop->xdata = dict_ref(xdata);
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ /* Do not log failures here as a memory problem would have already
+ * been logged by the corresponding alloc functions */
if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
-
goto out;
- }
}
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL, NULL);
}
}
/* FOP: statfs */
-int32_t ec_combine_statfs(ec_fop_data_t * fop, ec_cbk_data_t * dst,
- ec_cbk_data_t * src)
+int32_t
+ec_combine_statfs(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
{
ec_statvfs_combine(&dst->statvfs, &src->statvfs);
return 1;
}
-int32_t ec_statfs_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, struct statvfs * buf,
- dict_t * xdata)
+int32_t
+ec_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct statvfs *buf, dict_t *xdata)
{
- ec_fop_data_t * fop = NULL;
- ec_cbk_data_t * cbk = NULL;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
@@ -1112,27 +968,23 @@ int32_t ec_statfs_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
fop = frame->local;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_STATFS, idx, op_ret,
op_errno);
- if (cbk != NULL)
- {
- if (op_ret >= 0)
- {
- if (buf != NULL)
- {
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ if (buf != NULL) {
cbk->statvfs = *buf;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
cbk->xdata = dict_ref(xdata);
- if (cbk->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -1142,15 +994,15 @@ int32_t ec_statfs_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
}
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_complete(fop);
}
return 0;
}
-void ec_wind_statfs(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_statfs(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -1159,12 +1011,14 @@ void ec_wind_statfs(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
&fop->loc[0], fop->xdata);
}
-int32_t ec_manager_statfs(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_statfs(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk = NULL;
+ gf_boolean_t deem_statfs_enabled = _gf_false;
+ int32_t err = 0;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
case EC_STATE_DISPATCH:
ec_dispatch_all(fop);
@@ -1172,34 +1026,24 @@ int32_t ec_manager_statfs(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
+ if (cbk != NULL) {
+ ec_t *ec = fop->xl->private;
+
+ if (cbk->xdata) {
+ err = dict_get_int8(cbk->xdata, "quota-deem-statfs",
+ (int8_t *)&deem_statfs_enabled);
+ if (err != -ENOENT) {
+ ec_cbk_set_error(cbk, -err, _gf_true);
}
}
- if (cbk->op_ret < 0)
- {
- ec_fop_set_error(fop, cbk->op_errno);
- }
- else
- {
- ec_t * ec = fop->xl->private;
+ if (err != 0 || deem_statfs_enabled == _gf_false) {
cbk->statvfs.f_blocks *= ec->fragments;
cbk->statvfs.f_bfree *= ec->fragments;
cbk->statvfs.f_bavail *= ec->fragments;
}
}
- else
- {
- ec_fop_set_error(fop, EIO);
- }
return EC_STATE_REPORT;
@@ -1208,8 +1052,7 @@ int32_t ec_manager_statfs(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->cbks.statfs != NULL)
- {
+ if (fop->cbks.statfs != NULL) {
fop->cbks.statfs(fop->req_frame, fop, fop->xl, cbk->op_ret,
cbk->op_errno, &cbk->statvfs, cbk->xdata);
}
@@ -1222,8 +1065,7 @@ int32_t ec_manager_statfs(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->cbks.statfs != NULL)
- {
+ if (fop->cbks.statfs != NULL) {
fop->cbks.statfs(fop->req_frame, fop, fop->xl, -1, fop->error,
NULL, NULL);
}
@@ -1231,51 +1073,49 @@ int32_t ec_manager_statfs(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_statfs(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_statfs_cbk_t func, void * data,
- loc_t * loc, dict_t * xdata)
+void
+ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_statfs_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata)
{
- ec_cbk_t callback = { .statfs = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.statfs = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(STATFS) %p", frame);
+ gf_msg_trace("ec", 0, "EC(STATFS) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_STATFS, 0, target, minimum,
- ec_wind_statfs, ec_manager_statfs, callback,
- data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_STATFS, EC_FLAG_LOCK_SHARED,
+ target, fop_flags, ec_wind_statfs,
+ ec_manager_statfs, callback, data);
+ if (fop == NULL) {
goto out;
}
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -1284,25 +1124,22 @@ void ec_statfs(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
}
}
/* FOP: xattrop */
-int32_t ec_combine_xattrop(ec_fop_data_t * fop, ec_cbk_data_t * dst,
- ec_cbk_data_t * src)
+int32_t
+ec_combine_xattrop(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
{
- if (!ec_dict_compare(dst->dict, src->dict))
- {
- gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching dictionary in "
- "answers of 'GF_FOP_XATTROP'");
+ if (!ec_dict_compare(dst->dict, src->dict)) {
+ gf_msg(fop->xl->name, GF_LOG_DEBUG, 0, EC_MSG_DICT_MISMATCH,
+ "Mismatching dictionary in "
+ "answers of 'GF_FOP_XATTROP'");
return 0;
}
@@ -1311,59 +1148,71 @@ int32_t ec_combine_xattrop(ec_fop_data_t * fop, ec_cbk_data_t * dst,
}
int32_t
-ec_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr,
- dict_t *xdata)
+ec_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr, dict_t *xdata)
{
- ec_fop_data_t *fop = NULL;
- ec_cbk_data_t *cbk = NULL;
- int32_t idx = (int32_t)(uintptr_t)cookie;
- uint64_t version = 0;
- uint64_t *version_xattr = 0;
+ ec_fop_data_t *fop = NULL;
+ ec_lock_link_t *link = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ uint64_t dirty[2] = {0};
+ data_t *data;
+ uint64_t *version;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
- VALIDATE_OR_GOTO (this, out);
- GF_VALIDATE_OR_GOTO (this->name, frame, out);
- GF_VALIDATE_OR_GOTO (this->name, frame->local, out);
- GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = frame->local;
+ fop = frame->local;
- ec_trace ("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
- cbk = ec_cbk_data_allocate (frame, this, fop, fop->id, idx, op_ret,
- op_errno);
- if (!cbk)
- goto out;
+ cbk = ec_cbk_data_allocate(frame, this, fop, fop->id, idx, op_ret,
+ op_errno);
+ if (!cbk)
+ goto out;
- if (op_ret >= 0) {
- uint64_t dirty;
- cbk->dict = dict_ref (xattr);
-
- if (dict_get_bin (xattr, EC_XATTR_VERSION,
- (void **)&version_xattr) == 0) {
- version = ntoh64(version_xattr[0]);
- if ((version >> EC_SELFHEAL_BIT) & 1)
- fop->healing |= (1ULL<<idx);
- }
+ if (op_ret >= 0) {
+ cbk->dict = dict_ref(xattr);
+
+ data = dict_get(cbk->dict, EC_XATTR_VERSION);
+ if ((data != NULL) && (data->len >= sizeof(uint64_t))) {
+ version = (uint64_t *)data->data;
+
+ if (((ntoh64(version[0]) >> EC_SELFHEAL_BIT) & 1) != 0) {
+ LOCK(&fop->lock);
- if (ec_dict_del_number (xattr, EC_XATTR_DIRTY, &dirty) == 0)
- cbk->dirty = dirty != 0;
+ fop->healing |= 1ULL << idx;
+
+ UNLOCK(&fop->lock);
+ }
}
- if (xdata)
- cbk->xdata = dict_ref(xdata);
+ ec_dict_del_array(xattr, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE);
+ link = fop->data;
+ if (link) {
+ /*Keep a note of if the dirty is already set or not*/
+ link->dirty[0] |= (dirty[0] != 0);
+ link->dirty[1] |= (dirty[1] != 0);
+ }
+ }
- ec_combine (cbk, ec_combine_xattrop);
+ if (xdata)
+ cbk->xdata = dict_ref(xdata);
+
+ ec_combine(cbk, ec_combine_xattrop);
out:
- if (fop)
- ec_complete(fop);
+ if (fop)
+ ec_complete(fop);
- return 0;
+ return 0;
}
-void ec_wind_xattrop(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_xattrop(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -1372,21 +1221,20 @@ void ec_wind_xattrop(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
&fop->loc[0], fop->xattrop_flags, fop->dict, fop->xdata);
}
-int32_t ec_manager_xattrop(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_xattrop(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
case EC_STATE_LOCK:
- if (fop->fd == NULL)
- {
- ec_lock_prepare_inode(fop, &fop->loc[0], 1);
- }
- else
- {
- ec_lock_prepare_fd(fop, fop->fd, 1);
+ if (fop->fd == NULL) {
+ ec_lock_prepare_inode(fop, &fop->loc[0], EC_UPDATE_META, 0,
+ EC_RANGE_FULL);
+ } else {
+ ec_lock_prepare_fd(fop, fop->fd, EC_UPDATE_META, 0,
+ EC_RANGE_FULL);
}
ec_lock(fop);
@@ -1398,27 +1246,12 @@ int32_t ec_manager_xattrop(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA) ||
- ((cbk->op_ret >= 0) && !ec_dict_combine(cbk,
- EC_COMBINE_DICT)))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret < 0)
- {
- ec_fop_set_error(fop, cbk->op_errno);
- }
- }
- else
- {
- ec_fop_set_error(fop, EIO);
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ int32_t err;
+
+ err = ec_dict_combine(cbk, EC_COMBINE_DICT);
+ ec_cbk_set_error(cbk, -err, _gf_false);
}
return EC_STATE_REPORT;
@@ -1428,19 +1261,13 @@ int32_t ec_manager_xattrop(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->id == GF_FOP_XATTROP)
- {
- if (fop->cbks.xattrop != NULL)
- {
- fop->cbks.xattrop(fop->req_frame, fop, fop->xl,
- cbk->op_ret, cbk->op_errno, cbk->dict,
- cbk->xdata);
+ if (fop->id == GF_FOP_XATTROP) {
+ if (fop->cbks.xattrop != NULL) {
+ fop->cbks.xattrop(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->dict, cbk->xdata);
}
- }
- else
- {
- if (fop->cbks.fxattrop != NULL)
- {
+ } else {
+ if (fop->cbks.fxattrop != NULL) {
fop->cbks.fxattrop(fop->req_frame, fop, fop->xl,
cbk->op_ret, cbk->op_errno, cbk->dict,
cbk->xdata);
@@ -1456,18 +1283,13 @@ int32_t ec_manager_xattrop(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->id == GF_FOP_XATTROP)
- {
- if (fop->cbks.xattrop != NULL)
- {
+ if (fop->id == GF_FOP_XATTROP) {
+ if (fop->cbks.xattrop != NULL) {
fop->cbks.xattrop(fop->req_frame, fop, fop->xl, -1,
fop->error, NULL, NULL);
}
- }
- else
- {
- if (fop->cbks.fxattrop != NULL)
- {
+ } else {
+ if (fop->cbks.fxattrop != NULL) {
fop->cbks.fxattrop(fop->req_frame, fop, fop->xl, -1,
fop->error, NULL, NULL);
}
@@ -1488,66 +1310,61 @@ int32_t ec_manager_xattrop(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_xattrop(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_xattrop_cbk_t func, void * data,
- loc_t * loc, gf_xattrop_flags_t optype, dict_t * xattr,
- dict_t * xdata)
+void
+ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_xattrop_cbk_t func, void *data, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- ec_cbk_t callback = { .xattrop = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.xattrop = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(XATTROP) %p", frame);
+ gf_msg_trace("ec", 0, "EC(XATTROP) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_XATTROP,
- EC_FLAG_UPDATE_LOC_INODE, target, minimum,
- ec_wind_xattrop, ec_manager_xattrop, callback,
- data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_XATTROP, 0, target,
+ fop_flags, ec_wind_xattrop, ec_manager_xattrop,
+ callback, data);
+ if (fop == NULL) {
goto out;
}
fop->xattrop_flags = optype;
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (xattr != NULL)
- {
+ if (xattr != NULL) {
fop->dict = dict_ref(xattr);
- if (fop->dict == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->dict == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -1556,17 +1373,15 @@ void ec_xattrop(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
}
}
-void ec_wind_fxattrop(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_fxattrop(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -1575,27 +1390,25 @@ void ec_wind_fxattrop(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
fop->fd, fop->xattrop_flags, fop->dict, fop->xdata);
}
-void ec_fxattrop(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_fxattrop_cbk_t func, void * data,
- fd_t * fd, gf_xattrop_flags_t optype, dict_t * xattr,
- dict_t * xdata)
+void
+ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fxattrop_cbk_t func, void *data, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- ec_cbk_t callback = { .fxattrop = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.fxattrop = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(FXATTROP) %p", frame);
+ gf_msg_trace("ec", 0, "EC(FXATTROP) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FXATTROP,
- EC_FLAG_UPDATE_FD_INODE, target, minimum,
- ec_wind_fxattrop, ec_manager_xattrop, callback,
- data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FXATTROP, 0, target,
+ fop_flags, ec_wind_fxattrop, ec_manager_xattrop,
+ callback, data);
+ if (fop == NULL) {
goto out;
}
@@ -1603,35 +1416,32 @@ void ec_fxattrop(call_frame_t * frame, xlator_t * this, uintptr_t target,
fop->xattrop_flags = optype;
- if (fd != NULL)
- {
+ if (fd != NULL) {
fop->fd = fd_ref(fd);
- if (fop->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
- if (xattr != NULL)
- {
+ if (xattr != NULL) {
fop->dict = dict_ref(xattr);
- if (fop->dict == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->dict == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -1640,12 +1450,142 @@ void ec_fxattrop(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
}
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL);
+}
+
+/* FOP: IPC */
+
+int32_t
+ec_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_IPC, idx, op_ret,
+ op_errno);
+
+ if (cbk != NULL) {
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ }
+
+ ec_combine(cbk, NULL);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_ipc(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_ipc_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->ipc, fop->int32,
+ fop->xdata);
+}
+
+int32_t
+ec_manager_ipc(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ ec_fop_prepare_answer(fop, _gf_true);
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+ if (fop->cbks.ipc != NULL) {
+ fop->cbks.ipc(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->xdata);
+ }
+
+ return EC_STATE_END;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.ipc != NULL) {
+ fop->cbks.ipc(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL);
+ }
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_ipc_cbk_t func, void *data, int32_t op,
+ dict_t *xdata)
+{
+ ec_cbk_t callback = {.ipc = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(IPC) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_IPC, 0, target, fop_flags,
+ ec_wind_ipc, ec_manager_ipc, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ }
+ fop->int32 = op;
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
}
}
diff --git a/xlators/cluster/ec/src/ec-gf.c b/xlators/cluster/ec/src/ec-gf.c
deleted file mode 100644
index 1ae8928f20b..00000000000
--- a/xlators/cluster/ec/src/ec-gf.c
+++ /dev/null
@@ -1,11635 +0,0 @@
-/*
- Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <inttypes.h>
-#include <string.h>
-
-#include "ec-gf.h"
-
-static void gf8_muladd_00(uint8_t * out, uint8_t * in, unsigned int width)
-{
- memcpy(out, in, sizeof(uint64_t) * 8 * width);
-}
-
-static void gf8_muladd_01(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- out_ptr[0] ^= in_ptr[0];
- out_ptr[width] ^= in_ptr[width];
- out_ptr[width * 2] ^= in_ptr[width * 2];
- out_ptr[width * 3] ^= in_ptr[width * 3];
- out_ptr[width * 4] ^= in_ptr[width * 4];
- out_ptr[width * 5] ^= in_ptr[width * 5];
- out_ptr[width * 6] ^= in_ptr[width * 6];
- out_ptr[width * 7] ^= in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_02(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in7;
- out1 = in0;
- out7 = in6;
- out5 = in4;
- out6 = in5;
- out3 = in2 ^ in7;
- out4 = in3 ^ in7;
- out2 = in1 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_03(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in0 ^ in7;
- tmp0 = in2 ^ in7;
- out1 = in0 ^ in1;
- out7 = in6 ^ in7;
- out5 = in4 ^ in5;
- out6 = in5 ^ in6;
- out4 = in3 ^ in4 ^ in7;
- out2 = tmp0 ^ in1;
- out3 = tmp0 ^ in3;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_04(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in6;
- out1 = in7;
- out7 = in5;
- out6 = in4;
- tmp0 = in6 ^ in7;
- out2 = in0 ^ in6;
- out5 = in3 ^ in7;
- out3 = tmp0 ^ in1;
- out4 = tmp0 ^ in2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_05(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in0 ^ in6;
- out1 = in1 ^ in7;
- out7 = in5 ^ in7;
- out6 = in4 ^ in6;
- out2 = out0 ^ in2;
- out3 = out1 ^ in3 ^ in6;
- out5 = out7 ^ in3;
- out4 = out6 ^ in2 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_06(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in6 ^ in7;
- tmp0 = in1 ^ in6;
- out1 = in0 ^ in7;
- out7 = in5 ^ in6;
- out6 = in4 ^ in5;
- out4 = in2 ^ in3 ^ in6;
- out5 = in3 ^ in4 ^ in7;
- out3 = tmp0 ^ in2;
- out2 = tmp0 ^ out1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_07(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in6;
- tmp1 = in5 ^ in6;
- tmp2 = in0 ^ in7;
- tmp3 = tmp0 ^ in3;
- out6 = tmp1 ^ in4;
- out7 = tmp1 ^ in7;
- out0 = tmp2 ^ in6;
- out1 = tmp2 ^ in1;
- out3 = tmp3 ^ in1;
- out4 = tmp3 ^ in4;
- out5 = out4 ^ out7 ^ in2;
- out2 = tmp0 ^ out1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_08(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in5;
- out1 = in6;
- out7 = in4;
- out6 = in3 ^ in7;
- out3 = in0 ^ in5 ^ in6;
- out5 = in2 ^ in6 ^ in7;
- out2 = in5 ^ in7;
- out4 = out2 ^ in1 ^ in6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_09(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in0 ^ in5;
- tmp0 = in3 ^ in6;
- out1 = in1 ^ in6;
- out7 = in4 ^ in7;
- out2 = in2 ^ in5 ^ in7;
- out3 = tmp0 ^ out0;
- out6 = tmp0 ^ in7;
- out4 = out1 ^ out7 ^ in5;
- out5 = out2 ^ in6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_0A(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in5 ^ in7;
- out1 = in0 ^ in6;
- out7 = in4 ^ in6;
- out2 = in1 ^ in5;
- out6 = out0 ^ in3;
- out3 = out0 ^ out1 ^ in2;
- out5 = out7 ^ in2 ^ in7;
- out4 = out2 ^ in3 ^ in6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_0B(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in5;
- tmp1 = in0 ^ in6;
- tmp2 = in4 ^ in7;
- out0 = in0 ^ in5 ^ in7;
- out2 = tmp0 ^ in1;
- out1 = tmp1 ^ in1;
- out6 = tmp1 ^ out0 ^ in3;
- out7 = tmp2 ^ in6;
- out4 = tmp2 ^ out6 ^ in1;
- out3 = out6 ^ in0 ^ in2;
- out5 = tmp0 ^ out7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_0C(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in5 ^ in6;
- out1 = in6 ^ in7;
- out7 = in4 ^ in5;
- tmp0 = in1 ^ in5;
- tmp1 = in0 ^ in7;
- out5 = in2 ^ in3 ^ in6;
- out6 = in3 ^ in4 ^ in7;
- out2 = tmp1 ^ out0;
- out4 = tmp0 ^ in2;
- out3 = tmp0 ^ tmp1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_0D(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in4 ^ in5;
- tmp1 = in5 ^ in6;
- out1 = in1 ^ in6 ^ in7;
- out7 = tmp0 ^ in7;
- out4 = tmp0 ^ in1 ^ in2;
- out0 = tmp1 ^ in0;
- tmp2 = tmp1 ^ in3;
- out6 = tmp2 ^ out7;
- out2 = out0 ^ in2 ^ in7;
- out3 = out0 ^ out1 ^ in3;
- out5 = tmp2 ^ in2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_0E(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in1;
- tmp1 = in2 ^ in5;
- tmp2 = in5 ^ in6;
- out1 = in0 ^ in6 ^ in7;
- out3 = tmp0 ^ tmp1;
- out2 = tmp0 ^ tmp2;
- tmp3 = tmp1 ^ in3;
- out7 = tmp2 ^ in4;
- out0 = tmp2 ^ in7;
- out4 = tmp3 ^ in1 ^ in7;
- out5 = tmp3 ^ out7;
- out6 = out0 ^ out5 ^ in2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_0F(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in6 ^ in7;
- tmp1 = tmp0 ^ in1;
- tmp2 = tmp0 ^ in5;
- out1 = tmp1 ^ in0;
- out7 = tmp2 ^ in4;
- out0 = tmp2 ^ in0;
- out6 = out7 ^ in3;
- out5 = out6 ^ in2 ^ in7;
- tmp3 = tmp1 ^ out0 ^ in2;
- out4 = tmp1 ^ out5;
- out2 = tmp3 ^ in6;
- out3 = tmp3 ^ in3;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_10(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in4;
- out1 = in5;
- out7 = in3 ^ in7;
- tmp0 = in6 ^ in7;
- out2 = in4 ^ in6;
- tmp1 = out2 ^ in5;
- out6 = tmp0 ^ in2;
- out3 = tmp0 ^ tmp1;
- out5 = out2 ^ out3 ^ in1;
- out4 = tmp1 ^ in0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_11(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out7 = in3;
- out0 = in0 ^ in4;
- out1 = in1 ^ in5;
- out6 = in2 ^ in7;
- out4 = in0 ^ in5 ^ in6;
- out5 = in1 ^ in6 ^ in7;
- out2 = in2 ^ in4 ^ in6;
- out3 = in3 ^ in4 ^ in5 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_12(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in4 ^ in7;
- out1 = in0 ^ in5;
- out3 = in2 ^ in4 ^ in5;
- tmp0 = out0 ^ in6;
- out2 = tmp0 ^ in1;
- tmp1 = tmp0 ^ in3;
- out6 = tmp0 ^ out3;
- out5 = out2 ^ in5;
- out7 = tmp1 ^ in4;
- out4 = tmp1 ^ out1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_13(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out7 = in3 ^ in6;
- tmp0 = in0 ^ in5;
- tmp1 = in4 ^ in7;
- out6 = in2 ^ in5 ^ in7;
- out4 = tmp0 ^ out7 ^ in7;
- out1 = tmp0 ^ in1;
- out0 = tmp1 ^ in0;
- out5 = tmp1 ^ in1 ^ in6;
- out3 = tmp1 ^ out6 ^ in3;
- out2 = out5 ^ in2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_14(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in4 ^ in6;
- out1 = in5 ^ in7;
- out2 = in0 ^ in4;
- tmp0 = out0 ^ in5;
- out7 = out1 ^ in3;
- tmp1 = out1 ^ in2;
- out3 = tmp0 ^ in1;
- out6 = tmp0 ^ tmp1;
- out4 = tmp1 ^ out2;
- out5 = out3 ^ in3 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_15(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out7 = in3 ^ in5;
- tmp0 = in0 ^ in4;
- out1 = in1 ^ in5 ^ in7;
- out5 = in1 ^ in3 ^ in6;
- out0 = tmp0 ^ in6;
- out2 = tmp0 ^ in2;
- out3 = out5 ^ in4 ^ in5;
- out6 = out2 ^ in0 ^ in7;
- out4 = tmp0 ^ out6 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_16(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in5;
- tmp1 = in4 ^ in7;
- tmp2 = in2 ^ in3 ^ in4;
- out1 = tmp0 ^ in7;
- out4 = tmp0 ^ tmp2;
- out0 = tmp1 ^ in6;
- tmp3 = tmp1 ^ in1;
- out6 = out0 ^ in2 ^ in5;
- out2 = tmp3 ^ in0;
- out3 = out6 ^ in1;
- out7 = tmp2 ^ out6;
- out5 = tmp3 ^ out7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_17(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in5;
- tmp1 = in3 ^ in6;
- tmp2 = tmp0 ^ in4;
- out4 = tmp0 ^ in0 ^ in3;
- out7 = tmp1 ^ in5;
- tmp3 = tmp1 ^ in1;
- out6 = tmp2 ^ in7;
- out5 = tmp3 ^ in4;
- out3 = tmp3 ^ out6;
- out0 = out3 ^ out4 ^ in1;
- out2 = out3 ^ out7 ^ in0;
- out1 = tmp2 ^ out2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_18(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in4 ^ in5;
- out1 = in5 ^ in6;
- tmp0 = in4 ^ in7;
- out5 = in1 ^ in2 ^ in5;
- out6 = in2 ^ in3 ^ in6;
- out2 = tmp0 ^ out1;
- out7 = tmp0 ^ in3;
- tmp1 = tmp0 ^ in0;
- out3 = tmp1 ^ in6;
- out4 = tmp1 ^ in1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_19(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out5 = in1 ^ in2;
- out7 = in3 ^ in4;
- tmp0 = in0 ^ in7;
- out6 = in2 ^ in3;
- out1 = in1 ^ in5 ^ in6;
- out0 = in0 ^ in4 ^ in5;
- out4 = tmp0 ^ in1;
- tmp1 = tmp0 ^ in6;
- out2 = tmp1 ^ out0 ^ in2;
- out3 = tmp1 ^ out7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_1A(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in4 ^ in5;
- tmp1 = in5 ^ in6;
- tmp2 = tmp0 ^ in1;
- out0 = tmp0 ^ in7;
- out1 = tmp1 ^ in0;
- tmp3 = tmp1 ^ in3;
- out5 = tmp2 ^ in2;
- out2 = tmp2 ^ in6;
- out7 = tmp3 ^ out0;
- out6 = tmp3 ^ in2;
- out4 = tmp3 ^ out2 ^ in0;
- out3 = tmp0 ^ out1 ^ in2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_1B(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in4;
- tmp1 = in2 ^ in5;
- tmp2 = in3 ^ in6;
- out5 = tmp0 ^ in1;
- tmp3 = tmp0 ^ in0;
- out6 = tmp1 ^ in3;
- out0 = tmp1 ^ tmp3 ^ in7;
- out7 = tmp2 ^ in4;
- tmp4 = out5 ^ in6;
- out3 = tmp2 ^ tmp3;
- out2 = tmp4 ^ in5;
- out4 = tmp4 ^ out3;
- out1 = tmp3 ^ out2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_1C(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in3;
- tmp1 = in4 ^ in6;
- tmp2 = in5 ^ in7;
- out6 = tmp0 ^ tmp1;
- out0 = tmp1 ^ in5;
- out1 = tmp2 ^ in6;
- tmp3 = tmp2 ^ in1;
- tmp4 = tmp2 ^ in4;
- out2 = tmp4 ^ in0;
- out7 = tmp4 ^ in3;
- out5 = tmp0 ^ tmp3;
- out3 = tmp3 ^ out2;
- out4 = out3 ^ in2 ^ in6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_1D(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in1 ^ in3;
- tmp1 = in0 ^ in4;
- tmp2 = in3 ^ in4;
- tmp3 = in2 ^ in7;
- out3 = tmp0 ^ tmp1;
- out5 = tmp0 ^ tmp3;
- tmp4 = tmp1 ^ in5;
- out6 = tmp2 ^ in2;
- out7 = tmp2 ^ in5;
- out2 = tmp3 ^ tmp4;
- out4 = out3 ^ out6 ^ in6;
- out0 = tmp4 ^ in6;
- out1 = out2 ^ out4 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_1E(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in4;
- tmp1 = in2 ^ in7;
- tmp2 = tmp0 ^ in1;
- out3 = tmp1 ^ tmp2;
- out2 = tmp2 ^ in5;
- out4 = out3 ^ in3 ^ in6;
- tmp3 = out4 ^ in7;
- out6 = tmp3 ^ out2 ^ in4;
- out7 = tmp1 ^ out6;
- out0 = out7 ^ in3;
- out1 = tmp0 ^ out0;
- out5 = tmp3 ^ out1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_1F(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in4 ^ in6;
- tmp1 = tmp0 ^ in5;
- out7 = tmp1 ^ in3;
- out0 = tmp1 ^ in0 ^ in7;
- out6 = out7 ^ in2 ^ in6;
- out1 = out0 ^ in1 ^ in4;
- out4 = out0 ^ out6 ^ in1;
- out3 = tmp0 ^ out4;
- out2 = out4 ^ out7 ^ in7;
- out5 = out3 ^ in0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_20(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out1 = in4;
- out0 = in3 ^ in7;
- tmp0 = in3 ^ in4;
- tmp1 = in6 ^ in7;
- out2 = out0 ^ in5;
- out4 = tmp0 ^ in5;
- out3 = tmp0 ^ tmp1;
- out7 = tmp1 ^ in2;
- out6 = tmp1 ^ in1 ^ in5;
- out5 = out2 ^ out3 ^ in0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_21(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out1 = in1 ^ in4;
- tmp0 = in4 ^ in6;
- out4 = in3 ^ in5;
- out7 = in2 ^ in6;
- out0 = in0 ^ in3 ^ in7;
- out6 = in1 ^ in5 ^ in7;
- out3 = tmp0 ^ in7;
- out5 = tmp0 ^ in0;
- out2 = out4 ^ in2 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_22(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in3;
- out1 = in0 ^ in4;
- out7 = in2 ^ in7;
- out4 = in4 ^ in5 ^ in7;
- out5 = in0 ^ in5 ^ in6;
- out6 = in1 ^ in6 ^ in7;
- out3 = in2 ^ in3 ^ in4 ^ in6;
- out2 = in1 ^ in3 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_23(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out7 = in2;
- out0 = in0 ^ in3;
- out4 = in5 ^ in7;
- out5 = in0 ^ in6;
- out6 = in1 ^ in7;
- out3 = in2 ^ in4 ^ in6;
- out1 = in0 ^ in1 ^ in4;
- out2 = out4 ^ out6 ^ in2 ^ in3;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_24(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out1 = in4 ^ in7;
- tmp0 = in3 ^ in4;
- out0 = in3 ^ in6 ^ in7;
- out3 = tmp0 ^ in1;
- tmp1 = out0 ^ in5;
- out6 = tmp1 ^ out3;
- out2 = tmp1 ^ in0;
- out7 = tmp1 ^ in2 ^ in3;
- out5 = out2 ^ in4;
- out4 = tmp0 ^ out7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_25(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in1 ^ in4;
- tmp0 = in2 ^ in5;
- out1 = out3 ^ in7;
- out7 = tmp0 ^ in6;
- out6 = out1 ^ in5;
- out4 = out7 ^ in3 ^ in7;
- out2 = out4 ^ in0;
- out0 = tmp0 ^ out2;
- out5 = out0 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_26(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in3 ^ in6;
- tmp0 = in4 ^ in7;
- out7 = in2 ^ in5 ^ in7;
- tmp1 = out0 ^ in0 ^ in5;
- out1 = tmp0 ^ in0;
- tmp2 = tmp0 ^ in6;
- out2 = tmp1 ^ in1;
- out5 = tmp1 ^ in7;
- out6 = tmp2 ^ in1;
- out4 = tmp2 ^ out7;
- out3 = out0 ^ out6 ^ in2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_27(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out7 = in2 ^ in5;
- out0 = in0 ^ in3 ^ in6;
- out6 = in1 ^ in4 ^ in7;
- out4 = out7 ^ in6;
- out2 = out0 ^ out7 ^ in1;
- out5 = out0 ^ in7;
- out1 = out6 ^ in0;
- out3 = out6 ^ in2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_28(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in3;
- out1 = in4 ^ in6;
- out0 = in3 ^ in5 ^ in7;
- tmp0 = out1 ^ in7;
- tmp1 = out0 ^ in4;
- out7 = tmp0 ^ in2;
- tmp2 = tmp0 ^ in1;
- out3 = tmp1 ^ in0;
- out6 = tmp1 ^ tmp2;
- out4 = tmp2 ^ in3;
- out5 = out3 ^ in2 ^ in3;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_29(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in2 ^ in3;
- tmp0 = in1 ^ in3;
- tmp1 = in4 ^ in6;
- tmp2 = in0 ^ in4 ^ in7;
- out6 = tmp0 ^ in5;
- out4 = tmp0 ^ in6 ^ in7;
- out1 = tmp1 ^ in1;
- out7 = tmp1 ^ in2;
- out3 = tmp2 ^ in5;
- out5 = tmp2 ^ in2;
- out0 = out3 ^ in3 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_2A(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in3 ^ in5;
- tmp0 = in1 ^ in3;
- tmp1 = in0 ^ in4;
- out7 = in2 ^ in4 ^ in7;
- out3 = tmp1 ^ out0 ^ in2;
- out2 = tmp0 ^ in7;
- out6 = tmp0 ^ in6;
- out1 = tmp1 ^ in6;
- out5 = tmp1 ^ out7 ^ in5;
- out4 = out1 ^ in0 ^ in1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_2B(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in1 ^ in6;
- out7 = in2 ^ in4;
- tmp0 = in0 ^ in5;
- tmp1 = in2 ^ in7;
- out6 = in1 ^ in3;
- out1 = out4 ^ in0 ^ in4;
- out3 = tmp0 ^ out7;
- out0 = tmp0 ^ in3;
- out5 = tmp1 ^ in0;
- out2 = tmp1 ^ out6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_2C(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in5;
- tmp1 = in2 ^ in3 ^ in4;
- tmp2 = tmp0 ^ in6;
- out4 = tmp1 ^ in1;
- out5 = tmp1 ^ in0 ^ in5;
- tmp3 = tmp2 ^ in4;
- out6 = tmp2 ^ out4;
- out7 = tmp3 ^ in7;
- out2 = tmp3 ^ out5;
- out3 = out6 ^ in0;
- out0 = tmp1 ^ out7;
- out1 = tmp0 ^ out7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_2D(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in3;
- out4 = tmp0 ^ in1;
- tmp1 = tmp0 ^ in0;
- out2 = tmp1 ^ in6;
- out5 = tmp1 ^ in4;
- tmp2 = out2 ^ in2;
- tmp3 = tmp2 ^ in5;
- out0 = tmp3 ^ in7;
- out7 = tmp3 ^ out5;
- out6 = out4 ^ out7 ^ in6;
- out3 = tmp2 ^ out6;
- out1 = out0 ^ out6 ^ in0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_2E(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in4 ^ in7;
- out0 = in3 ^ in5 ^ in6;
- tmp1 = tmp0 ^ in0;
- tmp2 = tmp0 ^ in2;
- out1 = tmp1 ^ in6;
- out4 = tmp2 ^ in1;
- out7 = tmp2 ^ in5;
- out3 = out0 ^ out4 ^ in0;
- out2 = out3 ^ out7 ^ in7;
- out6 = tmp1 ^ out2;
- out5 = tmp1 ^ out7 ^ in3;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_2F(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in3;
- tmp1 = in2 ^ in5;
- out4 = in1 ^ in2 ^ in7;
- out6 = in1 ^ in3 ^ in4;
- out5 = tmp0 ^ in2;
- tmp2 = tmp0 ^ in6;
- out7 = tmp1 ^ in4;
- out0 = tmp2 ^ in5;
- out2 = tmp2 ^ out4;
- out1 = tmp2 ^ out6 ^ in7;
- out3 = tmp1 ^ out1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_30(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out1 = in4 ^ in5;
- tmp0 = in3 ^ in6;
- tmp1 = in4 ^ in7;
- out6 = in1 ^ in2 ^ in5;
- out3 = tmp0 ^ in5;
- out4 = tmp0 ^ in0;
- out7 = tmp0 ^ in2;
- out0 = tmp1 ^ in3;
- out2 = tmp1 ^ out3;
- out5 = tmp1 ^ in0 ^ in1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_31(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in5 ^ in6;
- tmp0 = in4 ^ in5;
- tmp1 = in0 ^ in3 ^ in4;
- tmp2 = out3 ^ in2;
- out1 = tmp0 ^ in1;
- out0 = tmp1 ^ in7;
- out4 = tmp1 ^ in6;
- out6 = tmp2 ^ in1;
- out2 = tmp2 ^ out0 ^ in0;
- out5 = out1 ^ in0 ^ in7;
- out7 = tmp0 ^ out2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_32(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in3 ^ in4;
- out7 = in2 ^ in3;
- tmp0 = in5 ^ in6;
- tmp1 = in0 ^ in7;
- out6 = in1 ^ in2;
- out1 = in0 ^ in4 ^ in5;
- out2 = tmp0 ^ out0 ^ in1;
- out3 = tmp0 ^ out7 ^ in7;
- out4 = tmp1 ^ in6;
- out5 = tmp1 ^ in1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_33(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in3;
- tmp1 = in0 ^ in4;
- tmp2 = in1 ^ in5;
- out6 = in1 ^ in2 ^ in6;
- out7 = tmp0 ^ in7;
- out0 = tmp1 ^ in3;
- out1 = tmp1 ^ tmp2;
- tmp3 = tmp2 ^ in7;
- tmp4 = tmp2 ^ in4 ^ in6;
- out5 = tmp3 ^ in0;
- out3 = tmp3 ^ out6;
- out4 = tmp4 ^ out5;
- out2 = tmp0 ^ tmp4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_34(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in3 ^ in4;
- tmp1 = in4 ^ in5;
- tmp2 = tmp0 ^ in1;
- tmp3 = tmp0 ^ in6;
- out1 = tmp1 ^ in7;
- tmp4 = tmp1 ^ in2;
- out5 = tmp2 ^ in0;
- out3 = tmp2 ^ out1;
- out0 = tmp3 ^ in7;
- out7 = tmp3 ^ tmp4;
- out6 = tmp4 ^ in1;
- out2 = out3 ^ out5 ^ in3;
- out4 = tmp4 ^ out2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_35(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in6;
- tmp1 = in5 ^ in7;
- out7 = tmp0 ^ tmp1 ^ in3;
- out3 = tmp1 ^ in1;
- out1 = out3 ^ in4;
- tmp2 = out1 ^ in7;
- out5 = tmp2 ^ in0 ^ in3;
- out6 = tmp0 ^ tmp2;
- out0 = out3 ^ out5 ^ in6;
- out4 = tmp0 ^ out0;
- out2 = out4 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_36(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in0 ^ in2;
- tmp0 = in1 ^ in3;
- out0 = in3 ^ in4 ^ in6;
- out6 = in1 ^ in2 ^ in4;
- out5 = tmp0 ^ in0;
- tmp1 = out5 ^ in5;
- out2 = tmp1 ^ in4;
- out3 = tmp1 ^ out4;
- out1 = tmp0 ^ out2 ^ in7;
- out7 = out3 ^ in1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_37(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in1 ^ in2;
- tmp1 = in2 ^ in4;
- tmp2 = tmp0 ^ in6;
- out3 = tmp0 ^ in5;
- out4 = tmp1 ^ in0;
- out6 = tmp2 ^ in4;
- out1 = out3 ^ out4 ^ in7;
- tmp3 = out4 ^ in1 ^ in3;
- out7 = tmp3 ^ out1;
- out2 = tmp3 ^ in5;
- out5 = tmp1 ^ out2;
- out0 = tmp2 ^ tmp3;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_38(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in0 ^ in3;
- tmp0 = in3 ^ in4;
- tmp1 = in5 ^ in7;
- tmp2 = out3 ^ in1;
- out2 = tmp0 ^ in6;
- out0 = tmp0 ^ tmp1;
- out4 = tmp1 ^ tmp2;
- out7 = out2 ^ in2;
- out1 = out2 ^ in3 ^ in5;
- out6 = out4 ^ in0 ^ in2;
- out5 = tmp2 ^ out7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_39(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in0;
- tmp0 = in1 ^ in5;
- tmp1 = tmp0 ^ in4;
- out1 = tmp1 ^ in6;
- out5 = out1 ^ in0 ^ in2;
- tmp2 = tmp0 ^ out5;
- out2 = tmp2 ^ in0 ^ in3;
- out7 = out2 ^ in7;
- out6 = tmp1 ^ out7;
- out4 = tmp2 ^ out6;
- out0 = out4 ^ in1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_3A(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in1;
- tmp1 = in0 ^ in2;
- tmp2 = in3 ^ in4;
- tmp3 = in1 ^ in6;
- tmp4 = in3 ^ in7;
- out4 = tmp0 ^ in5;
- out5 = tmp1 ^ tmp3;
- out3 = tmp1 ^ tmp4;
- out0 = tmp2 ^ in5;
- out7 = tmp2 ^ in2;
- tmp5 = tmp3 ^ in4;
- out2 = tmp4 ^ tmp5;
- out1 = tmp5 ^ out4;
- out6 = tmp0 ^ out3;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_3B(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in1 ^ in6;
- tmp1 = in2 ^ in7;
- tmp2 = tmp0 ^ in3;
- out3 = tmp1 ^ in0;
- out6 = tmp1 ^ tmp2;
- out2 = out6 ^ in4;
- out7 = tmp0 ^ out2;
- out0 = out3 ^ out7 ^ in5;
- out5 = out0 ^ out2 ^ in7;
- out1 = tmp2 ^ out0;
- out4 = out1 ^ in6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_3C(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in3;
- tmp1 = in2 ^ in7;
- tmp2 = in1 ^ in6 ^ in7;
- out2 = tmp0 ^ in4;
- out3 = tmp0 ^ tmp2;
- out4 = tmp1 ^ out3 ^ in5;
- out5 = tmp2 ^ out2 ^ in2;
- out1 = out4 ^ out5 ^ in6;
- out0 = out1 ^ in3;
- out7 = tmp1 ^ out0;
- out6 = tmp2 ^ out7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_3D(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in2;
- tmp1 = tmp0 ^ in3;
- out2 = tmp1 ^ in4;
- tmp2 = out2 ^ in5;
- out4 = tmp2 ^ in1 ^ in6;
- out5 = out4 ^ in7;
- out6 = out5 ^ in0;
- out7 = out6 ^ in1;
- out0 = tmp0 ^ out7;
- out1 = tmp1 ^ out5;
- out3 = tmp2 ^ out6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_3E(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in3 ^ in5;
- tmp1 = tmp0 ^ in4;
- out0 = tmp1 ^ in6;
- out7 = tmp1 ^ in2;
- out6 = out7 ^ in1 ^ in5 ^ in7;
- out2 = out6 ^ in0 ^ in2;
- out4 = out0 ^ out6 ^ in0;
- out5 = tmp0 ^ out4;
- out3 = out5 ^ in7;
- out1 = out3 ^ out6 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_3F(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in1;
- out3 = tmp0 ^ in2 ^ in6;
- tmp1 = out3 ^ in5 ^ in7;
- out4 = tmp1 ^ in4;
- out5 = tmp1 ^ in3;
- out1 = out4 ^ in2;
- out7 = out1 ^ out3 ^ in3;
- out2 = tmp0 ^ out7 ^ in5;
- tmp2 = out2 ^ in0;
- out6 = tmp2 ^ in6;
- out0 = tmp1 ^ tmp2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_40(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out1 = in3 ^ in7;
- tmp0 = in3 ^ in4;
- tmp1 = in6 ^ in7;
- out4 = tmp0 ^ in2;
- out5 = tmp0 ^ in5;
- out0 = tmp1 ^ in2;
- out7 = tmp1 ^ in1 ^ in5;
- out2 = out0 ^ in4;
- out3 = out2 ^ out5 ^ in7;
- out6 = out3 ^ out4 ^ in0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_41(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in2 ^ in3;
- tmp0 = in5 ^ in6;
- tmp1 = in6 ^ in7;
- out5 = in3 ^ in4;
- out1 = in1 ^ in3 ^ in7;
- out6 = in0 ^ in4 ^ in5;
- out3 = tmp0 ^ in2;
- out7 = tmp0 ^ in1;
- out2 = tmp1 ^ in4;
- out0 = tmp1 ^ in0 ^ in2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_42(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in2 ^ in6;
- out5 = in3 ^ in5;
- out1 = in0 ^ in3 ^ in7;
- out7 = in1 ^ in5 ^ in7;
- out4 = in2 ^ in4 ^ in7;
- out6 = in0 ^ in4 ^ in6;
- out2 = out0 ^ in1 ^ in4;
- out3 = out5 ^ in6 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_43(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out5 = in3;
- out7 = in1 ^ in5;
- out4 = in2 ^ in7;
- out6 = in0 ^ in4;
- out0 = in0 ^ in2 ^ in6;
- out3 = in5 ^ in6 ^ in7;
- out2 = in1 ^ in4 ^ in6;
- out1 = in0 ^ in1 ^ in3 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_44(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out1 = in3;
- out0 = in2 ^ in7;
- tmp0 = in4 ^ in7;
- out7 = in1 ^ in6 ^ in7;
- out6 = in0 ^ in5 ^ in6;
- out4 = tmp0 ^ in3 ^ in6;
- out3 = out0 ^ in1 ^ in3 ^ in5;
- out2 = out0 ^ in0 ^ in4;
- out5 = tmp0 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_45(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out1 = in1 ^ in3;
- out7 = in1 ^ in6;
- out5 = in4 ^ in7;
- out6 = in0 ^ in5;
- out0 = in0 ^ in2 ^ in7;
- out4 = in3 ^ in6 ^ in7;
- out2 = out5 ^ in0;
- out3 = out0 ^ out6 ^ in1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_46(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in2;
- out1 = in0 ^ in3;
- out7 = in1 ^ in7;
- out4 = in4 ^ in6;
- out5 = in5 ^ in7;
- out6 = in0 ^ in6;
- out3 = in1 ^ in3 ^ in5;
- out2 = out4 ^ out6 ^ in1 ^ in2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_47(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in6;
- out7 = in1;
- out5 = in7;
- out6 = in0;
- tmp0 = in0 ^ in1;
- out3 = in1 ^ in5;
- out0 = in0 ^ in2;
- out1 = tmp0 ^ in3;
- out2 = tmp0 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_48(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in3;
- out1 = in3 ^ in6 ^ in7;
- out3 = tmp0 ^ in0;
- out0 = tmp0 ^ out1 ^ in5;
- tmp1 = out0 ^ in4;
- out2 = tmp1 ^ in7;
- out5 = tmp1 ^ in3;
- out4 = out5 ^ in1;
- out7 = tmp0 ^ out4;
- out6 = tmp1 ^ out3;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_49(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in0 ^ in2;
- tmp0 = in2 ^ in5;
- out2 = in4 ^ in5 ^ in6;
- tmp1 = tmp0 ^ out2 ^ in3;
- out7 = out2 ^ in1;
- out5 = tmp1 ^ in7;
- out4 = out5 ^ out7 ^ in6;
- out1 = tmp0 ^ out4;
- out6 = out1 ^ out7 ^ in0;
- out0 = tmp1 ^ out6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_4A(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in6;
- tmp1 = in3 ^ in7;
- out0 = tmp0 ^ in5;
- out3 = tmp1 ^ in0;
- out5 = tmp1 ^ out0;
- out4 = out0 ^ in1 ^ in4;
- out1 = out3 ^ in6;
- out2 = out4 ^ in7;
- out6 = out1 ^ in4;
- out7 = tmp0 ^ out2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_4B(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in0 ^ in7;
- tmp0 = in1 ^ in5;
- tmp1 = in2 ^ in6;
- tmp2 = out3 ^ in3;
- out7 = tmp0 ^ in4;
- out4 = tmp0 ^ tmp1;
- tmp3 = tmp1 ^ in0;
- out6 = tmp2 ^ in4;
- out5 = tmp2 ^ tmp3;
- out1 = tmp2 ^ in1 ^ in6;
- out2 = out7 ^ in6 ^ in7;
- out0 = tmp3 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_4C(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out1 = in3 ^ in6;
- tmp0 = in2 ^ in5;
- tmp1 = out1 ^ in5 ^ in7;
- out0 = tmp0 ^ in7;
- tmp2 = tmp0 ^ in4;
- out6 = tmp1 ^ in0;
- out2 = tmp2 ^ in0;
- out5 = tmp2 ^ in6;
- out3 = tmp0 ^ out6 ^ in1;
- out7 = out0 ^ out5 ^ in1;
- out4 = tmp1 ^ out7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_4D(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in5;
- tmp1 = in1 ^ in6;
- out4 = in1 ^ in3 ^ in5;
- tmp2 = tmp0 ^ in7;
- out2 = tmp0 ^ in4;
- out1 = tmp1 ^ in3;
- out7 = tmp1 ^ in4;
- out0 = tmp2 ^ in2;
- out6 = tmp2 ^ in3;
- out5 = out7 ^ in1 ^ in2;
- out3 = tmp1 ^ out0 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_4E(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in2 ^ in5;
- out7 = in1 ^ in4 ^ in7;
- out1 = in0 ^ in3 ^ in6;
- out5 = out0 ^ in6;
- out4 = out7 ^ in5;
- out3 = out1 ^ in1;
- out6 = out1 ^ in7;
- out2 = out4 ^ in0 ^ in2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_4F(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out5 = in2 ^ in6;
- out7 = in1 ^ in4;
- out3 = in0 ^ in1 ^ in6;
- out4 = in1 ^ in5 ^ in7;
- out0 = in0 ^ in2 ^ in5;
- out6 = in0 ^ in3 ^ in7;
- out1 = out3 ^ in3;
- out2 = out4 ^ in0 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_50(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in2 ^ in7;
- tmp0 = in3 ^ in5;
- out0 = out2 ^ in4 ^ in6;
- out1 = tmp0 ^ in7;
- tmp1 = tmp0 ^ in6;
- out3 = out0 ^ in3;
- out7 = tmp1 ^ in1;
- tmp2 = tmp1 ^ in0;
- out5 = out3 ^ in1 ^ in2;
- out4 = tmp2 ^ in2;
- out6 = tmp2 ^ out3;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_51(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in7;
- out3 = in2 ^ in4 ^ in6 ^ in7;
- out0 = out3 ^ in0;
- out6 = out0 ^ in5;
- out4 = out6 ^ in3 ^ in7;
- out1 = out0 ^ out4 ^ in1;
- out7 = out1 ^ in6;
- out5 = out7 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_52(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in1 ^ in2;
- tmp0 = in2 ^ in4;
- tmp1 = in3 ^ in5;
- tmp2 = in3 ^ in6;
- tmp3 = in0 ^ in7;
- out0 = tmp0 ^ in6;
- out6 = tmp0 ^ tmp3;
- out7 = tmp1 ^ in1;
- out1 = tmp1 ^ tmp3;
- out3 = tmp2 ^ in4;
- out5 = tmp2 ^ in1 ^ in7;
- out4 = tmp2 ^ out1 ^ in2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_53(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in1;
- out3 = in4 ^ in6;
- out0 = out3 ^ in0 ^ in2;
- out6 = out0 ^ in7;
- out4 = out6 ^ in5;
- out7 = out0 ^ out4 ^ in1 ^ in3;
- out1 = out7 ^ in0;
- out5 = out7 ^ in6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_54(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out1 = in3 ^ in5;
- tmp0 = in1 ^ in3;
- tmp1 = in2 ^ in4;
- tmp2 = in0 ^ in7;
- out5 = in1 ^ in4 ^ in6;
- out4 = tmp2 ^ out1;
- out7 = tmp0 ^ in6;
- out3 = tmp0 ^ tmp1;
- out0 = tmp1 ^ in7;
- tmp3 = tmp2 ^ in2;
- out2 = tmp3 ^ in6;
- out6 = tmp3 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_55(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in1 ^ in3;
- tmp1 = in1 ^ in4;
- tmp2 = in6 ^ in7;
- out7 = tmp0 ^ tmp2;
- out1 = tmp0 ^ in5;
- out3 = tmp1 ^ in2;
- out5 = tmp1 ^ in5 ^ in6;
- out2 = tmp2 ^ in0;
- out4 = out5 ^ out7 ^ in0;
- out6 = out2 ^ in2 ^ in5;
- out0 = out5 ^ out6 ^ in1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_56(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in2 ^ in4;
- tmp0 = in0 ^ in2;
- out4 = in0 ^ in5;
- out7 = in1 ^ in3;
- out5 = in1 ^ in6;
- out6 = tmp0 ^ in7;
- out2 = tmp0 ^ out5;
- out1 = out4 ^ in3;
- out3 = out7 ^ in4 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_57(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in5;
- tmp1 = in1 ^ in7;
- out0 = in0 ^ in2 ^ in4;
- out5 = in1 ^ in5 ^ in6;
- out4 = tmp0 ^ in4;
- out1 = tmp0 ^ in1 ^ in3;
- out2 = tmp0 ^ out5;
- out3 = tmp1 ^ in4;
- out7 = tmp1 ^ in3;
- out6 = tmp1 ^ out2 ^ in2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_58(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in2 ^ in5;
- tmp0 = in2 ^ in3 ^ in4;
- out5 = tmp0 ^ in1;
- out6 = tmp0 ^ in0 ^ in5;
- out3 = out6 ^ in7;
- tmp1 = out2 ^ out5;
- out7 = tmp1 ^ in6;
- out4 = tmp1 ^ out3 ^ in3;
- out0 = out4 ^ out7 ^ in0;
- out1 = tmp0 ^ out0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_59(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in5;
- tmp0 = in0 ^ in5 ^ in7;
- out3 = tmp0 ^ in2 ^ in4;
- out0 = out3 ^ in6;
- tmp1 = out0 ^ in7;
- out6 = tmp1 ^ in3;
- out5 = out6 ^ in0 ^ in1 ^ in6;
- out4 = tmp0 ^ out5;
- out1 = tmp1 ^ out4;
- out7 = out1 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_5A(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in1 ^ in2;
- tmp1 = in2 ^ in5;
- out5 = tmp0 ^ in3;
- out4 = tmp0 ^ in0;
- tmp2 = tmp1 ^ in4;
- out2 = tmp1 ^ in1 ^ in7;
- out7 = tmp2 ^ out5;
- out6 = out4 ^ out7 ^ in5;
- out0 = tmp2 ^ in6;
- out1 = out0 ^ out6 ^ in7;
- out3 = tmp1 ^ out6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_5B(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in3;
- tmp1 = in0 ^ in4;
- tmp2 = in1 ^ in5;
- out5 = tmp0 ^ tmp2;
- tmp3 = tmp1 ^ in6;
- out3 = tmp1 ^ in5;
- out2 = tmp2 ^ in7;
- tmp4 = out3 ^ in2;
- out7 = out2 ^ in3 ^ in4;
- out0 = tmp4 ^ in6;
- out6 = tmp0 ^ tmp3;
- out4 = tmp2 ^ tmp4;
- out1 = tmp3 ^ out7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_5C(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in3 ^ in6;
- tmp1 = in0 ^ in2 ^ in5;
- out1 = tmp0 ^ in5;
- tmp2 = tmp0 ^ in1;
- out2 = tmp1 ^ in6;
- out6 = tmp1 ^ in3;
- out4 = tmp2 ^ in0;
- out7 = tmp2 ^ in4;
- out3 = tmp1 ^ out7;
- out0 = out3 ^ out4 ^ in7;
- out5 = out0 ^ in1 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_5D(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in1;
- tmp1 = in0 ^ in6;
- out2 = tmp1 ^ in5;
- tmp2 = out2 ^ in3;
- out6 = tmp2 ^ in2;
- out1 = tmp0 ^ tmp2;
- tmp3 = out1 ^ in4 ^ in5;
- out4 = tmp3 ^ in0;
- out7 = tmp3 ^ in7;
- tmp4 = out4 ^ out6;
- out5 = tmp4 ^ in7;
- out0 = tmp0 ^ out5;
- out3 = tmp1 ^ tmp4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_5E(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in5;
- tmp1 = in3 ^ in5;
- tmp2 = in1 ^ in7;
- out7 = in1 ^ in3 ^ in4;
- out0 = tmp0 ^ in4;
- tmp3 = tmp1 ^ in0;
- out5 = tmp2 ^ in2;
- out1 = tmp3 ^ in6;
- out6 = tmp0 ^ tmp3;
- tmp4 = tmp2 ^ out1;
- out3 = tmp4 ^ in4;
- out4 = tmp1 ^ tmp4;
- out2 = tmp0 ^ out4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_5F(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in1 ^ in5;
- tmp1 = in0 ^ in6;
- tmp2 = tmp0 ^ in7;
- tmp3 = tmp1 ^ in3;
- out2 = tmp1 ^ tmp2;
- out5 = tmp2 ^ in2;
- out6 = tmp3 ^ in2;
- out3 = out2 ^ in4;
- out4 = out3 ^ in5;
- out1 = tmp0 ^ tmp3;
- out7 = tmp3 ^ out4;
- out0 = out4 ^ out5 ^ in6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_60(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in2 ^ in5;
- tmp0 = in3 ^ in6;
- out1 = in3 ^ in4 ^ in7;
- out7 = out4 ^ in1;
- tmp1 = out4 ^ in4;
- out0 = tmp0 ^ in2;
- out5 = tmp0 ^ in0;
- out2 = tmp0 ^ tmp1;
- out3 = tmp1 ^ in7;
- out6 = out3 ^ out7 ^ in0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_61(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in5;
- out4 = tmp0 ^ in4;
- tmp1 = out4 ^ in3;
- out3 = tmp1 ^ in7;
- out2 = tmp1 ^ in2 ^ in6;
- out1 = tmp0 ^ out3 ^ in1;
- out0 = out2 ^ out4 ^ in0;
- out7 = tmp1 ^ out1;
- out6 = out0 ^ out1 ^ in2;
- out5 = tmp0 ^ out0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_62(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in4 ^ in5;
- tmp0 = in0 ^ in3 ^ in4;
- out1 = tmp0 ^ in7;
- out5 = tmp0 ^ in6;
- tmp1 = out1 ^ in0;
- tmp2 = tmp1 ^ out3;
- out4 = tmp2 ^ in2;
- tmp3 = tmp2 ^ in1;
- out0 = out4 ^ in5 ^ in6;
- out7 = tmp3 ^ out0;
- out6 = tmp0 ^ tmp3;
- out2 = tmp1 ^ out7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_63(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in3 ^ in4;
- tmp1 = in1 ^ in7;
- out3 = tmp0 ^ in5;
- tmp2 = out3 ^ in6;
- out4 = out3 ^ in2 ^ in7;
- out5 = tmp2 ^ in0;
- tmp3 = out5 ^ in3;
- out0 = tmp3 ^ out4;
- out2 = tmp1 ^ tmp2;
- out6 = tmp1 ^ tmp3;
- tmp4 = tmp0 ^ out2;
- out1 = tmp4 ^ out5;
- out7 = tmp4 ^ in2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_64(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in2 ^ in3;
- out1 = in3 ^ in4;
- out7 = in1 ^ in2;
- tmp0 = in4 ^ in5;
- tmp1 = in0 ^ in7;
- out4 = in5 ^ in6 ^ in7;
- out2 = tmp0 ^ out0 ^ in0;
- out3 = tmp0 ^ out7 ^ in6;
- out5 = tmp1 ^ in6;
- out6 = tmp1 ^ in1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_65(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in3;
- tmp1 = in4 ^ in5;
- tmp2 = in6 ^ in7;
- out7 = in1 ^ in2 ^ in7;
- out1 = in1 ^ in3 ^ in4;
- out0 = tmp0 ^ in2;
- out2 = tmp0 ^ tmp1;
- out4 = tmp1 ^ tmp2;
- tmp3 = tmp2 ^ in0;
- out3 = out4 ^ out7 ^ in3;
- out5 = tmp3 ^ in5;
- out6 = tmp3 ^ in1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_66(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in1 ^ in2;
- tmp1 = in2 ^ in3;
- tmp2 = in0 ^ in4;
- out7 = tmp0 ^ in6;
- out0 = tmp1 ^ in7;
- out1 = tmp2 ^ in3;
- tmp3 = tmp2 ^ in6;
- tmp4 = out1 ^ in5;
- out5 = tmp3 ^ in7;
- out4 = tmp3 ^ tmp4;
- out2 = tmp0 ^ tmp4 ^ in7;
- out6 = tmp1 ^ out2 ^ in4;
- out3 = tmp3 ^ out6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_67(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in3;
- tmp1 = tmp0 ^ in1;
- tmp2 = tmp0 ^ in7;
- out1 = tmp1 ^ in4;
- out0 = tmp2 ^ in2;
- tmp3 = out1 ^ in7;
- out2 = tmp3 ^ in5;
- out3 = out2 ^ in0 ^ in6;
- out7 = tmp1 ^ out0 ^ in6;
- out5 = tmp1 ^ out3;
- out4 = tmp2 ^ out5;
- out6 = tmp3 ^ out4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_68(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in3 ^ in4;
- tmp1 = in2 ^ in3 ^ in5;
- tmp2 = tmp0 ^ in1;
- tmp3 = tmp0 ^ in6;
- out0 = tmp1 ^ in6;
- out6 = tmp2 ^ in0;
- out7 = tmp1 ^ tmp2;
- out1 = tmp3 ^ in7;
- out2 = out1 ^ in2;
- out4 = tmp2 ^ out2;
- out3 = out4 ^ out6 ^ in3;
- out5 = tmp3 ^ out3;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_69(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in6 ^ in7;
- out2 = tmp0 ^ in3 ^ in4;
- out1 = out2 ^ in1;
- out3 = out2 ^ in0 ^ in2;
- out4 = out1 ^ in2 ^ in3;
- out6 = out1 ^ in0 ^ in7;
- out7 = out4 ^ in5 ^ in6;
- out5 = out4 ^ out6 ^ in5;
- out0 = tmp0 ^ out5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_6A(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in6;
- out3 = in0 ^ in4 ^ in6;
- tmp1 = tmp0 ^ in3;
- out4 = tmp1 ^ in1;
- tmp2 = tmp1 ^ in7;
- out2 = out4 ^ in4;
- out0 = tmp2 ^ in5;
- out5 = tmp2 ^ out3;
- out7 = out2 ^ in3 ^ in5;
- out1 = tmp0 ^ out5;
- out6 = tmp1 ^ out7 ^ in0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_6B(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in4 ^ in6;
- out2 = tmp0 ^ in1 ^ in3;
- out4 = out2 ^ in2;
- tmp1 = out2 ^ in0;
- out7 = out4 ^ in3 ^ in5 ^ in7;
- out1 = tmp1 ^ in7;
- out3 = tmp1 ^ in1;
- out6 = tmp1 ^ in5;
- out0 = tmp1 ^ out7 ^ in6;
- out5 = tmp0 ^ out0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_6C(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in1;
- tmp0 = in2 ^ in3;
- out5 = in0 ^ in2;
- out1 = in3 ^ in4 ^ in6;
- tmp1 = out5 ^ in1;
- out0 = tmp0 ^ in5;
- out6 = tmp0 ^ tmp1;
- out3 = tmp1 ^ in4;
- out7 = out3 ^ in0;
- out2 = out6 ^ out7 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_6D(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in1 ^ in4;
- tmp0 = in0 ^ in2;
- tmp1 = out4 ^ in3;
- out7 = out4 ^ in2 ^ in7;
- out5 = tmp0 ^ in5;
- out3 = tmp0 ^ tmp1;
- out1 = tmp1 ^ in6;
- out0 = out5 ^ in3;
- out2 = out3 ^ out7 ^ in4;
- out6 = out1 ^ in0 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_6E(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in1 ^ in3;
- tmp1 = in0 ^ in4;
- out4 = tmp0 ^ in7;
- out6 = tmp0 ^ in0 ^ in5;
- out5 = tmp1 ^ in2;
- tmp2 = tmp1 ^ in3;
- out3 = tmp2 ^ out4;
- out1 = tmp2 ^ in6;
- out2 = tmp0 ^ out5;
- out0 = out2 ^ out3 ^ in5;
- out7 = out1 ^ out2 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_6F(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in3 ^ in7;
- tmp1 = tmp0 ^ in4;
- tmp2 = tmp0 ^ in0 ^ in2;
- out4 = tmp1 ^ in1;
- out0 = tmp2 ^ in5;
- out3 = out4 ^ in0;
- out2 = out3 ^ in7;
- out1 = out2 ^ in6;
- out6 = out1 ^ in4 ^ in5;
- out7 = tmp2 ^ out1;
- out5 = tmp1 ^ out0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_70(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in2;
- tmp0 = in2 ^ in4;
- out2 = in2 ^ in3 ^ in5;
- tmp1 = tmp0 ^ in6;
- tmp2 = out2 ^ in7;
- out0 = tmp1 ^ in3;
- out4 = tmp1 ^ in0;
- out7 = tmp2 ^ in1;
- out6 = out4 ^ in1;
- out5 = out7 ^ in0 ^ in2;
- out1 = tmp0 ^ tmp2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_71(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in3 ^ in5;
- out3 = in2 ^ in3;
- tmp0 = in0 ^ in2;
- tmp1 = out2 ^ in1;
- out4 = tmp0 ^ in6;
- tmp2 = tmp0 ^ in1;
- out7 = tmp1 ^ in2;
- out1 = tmp1 ^ in4 ^ in7;
- out0 = out4 ^ in3 ^ in4;
- out6 = tmp2 ^ in4;
- out5 = tmp2 ^ out3 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_72(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in7;
- tmp0 = in0 ^ in4;
- tmp1 = tmp0 ^ in3 ^ in7;
- out1 = tmp1 ^ in5;
- out5 = out1 ^ in1;
- tmp2 = tmp0 ^ out5;
- out2 = tmp2 ^ in2;
- out7 = out2 ^ in6;
- out6 = tmp1 ^ out7;
- out4 = tmp2 ^ out6;
- out0 = out4 ^ in0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_73(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in3 ^ in7;
- out2 = out3 ^ in1 ^ in5;
- out1 = out2 ^ in0 ^ in4;
- out5 = out1 ^ in5;
- out6 = out1 ^ out3 ^ in2;
- out0 = out2 ^ out6 ^ in6;
- out7 = out0 ^ out1 ^ in3;
- out4 = out0 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_74(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in3 ^ in4;
- tmp1 = in1 ^ in2 ^ in6;
- out4 = in0 ^ in4 ^ in7;
- out5 = in0 ^ in1 ^ in5;
- out0 = tmp0 ^ in2;
- out1 = tmp0 ^ in5;
- out3 = tmp1 ^ in7;
- out6 = tmp1 ^ in0;
- out2 = tmp1 ^ out5 ^ in3;
- out7 = out3 ^ in3 ^ in6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_75(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in0 ^ in7;
- tmp0 = in1 ^ in3;
- out5 = in0 ^ in1;
- out7 = tmp0 ^ in2;
- tmp1 = tmp0 ^ in4;
- out6 = out5 ^ in2;
- tmp2 = out7 ^ in6;
- out1 = tmp1 ^ in5;
- out0 = tmp1 ^ out6;
- out3 = tmp2 ^ in7;
- out2 = tmp2 ^ out6 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_76(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in1 ^ in6;
- tmp0 = in0 ^ in5;
- tmp1 = in3 ^ in7;
- tmp2 = tmp0 ^ in4;
- tmp3 = tmp1 ^ in2;
- out5 = tmp2 ^ in1;
- out1 = tmp2 ^ in3;
- out0 = tmp3 ^ in4;
- out4 = out1 ^ in5;
- out7 = tmp3 ^ out3;
- out2 = tmp0 ^ out7;
- out6 = tmp1 ^ out2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_77(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in0 ^ in3;
- tmp0 = in1 ^ in4;
- tmp1 = in1 ^ in6;
- tmp2 = out4 ^ in5;
- out5 = tmp0 ^ in0;
- out1 = tmp0 ^ tmp2;
- out3 = tmp1 ^ in3;
- out2 = tmp1 ^ tmp2 ^ in7;
- out7 = out3 ^ in2;
- tmp3 = out7 ^ in6;
- out6 = tmp2 ^ tmp3;
- out0 = tmp3 ^ out5 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_78(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in3;
- tmp1 = in2 ^ in7;
- tmp2 = in0 ^ in5 ^ in6;
- out2 = tmp1 ^ in3;
- out3 = tmp2 ^ in2;
- out5 = out3 ^ in1 ^ in3;
- out0 = tmp0 ^ out3 ^ in4;
- out1 = tmp1 ^ out0;
- out4 = out1 ^ out5 ^ in5;
- out7 = tmp0 ^ out4;
- out6 = tmp2 ^ out7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_79(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in3 ^ in7;
- tmp0 = in3 ^ in4;
- tmp1 = in1 ^ in5;
- tmp2 = tmp1 ^ in2;
- out4 = tmp2 ^ in0 ^ in7;
- tmp3 = out4 ^ in5;
- out5 = tmp3 ^ out2 ^ in6;
- out7 = tmp0 ^ tmp2;
- out6 = tmp0 ^ tmp3;
- out3 = tmp1 ^ out5;
- out0 = out3 ^ in4;
- out1 = tmp3 ^ out0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_7A(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in1 ^ in2;
- out2 = tmp0 ^ in3;
- tmp1 = out2 ^ in4;
- out4 = tmp1 ^ in0 ^ in5;
- out5 = out4 ^ in6;
- out6 = out5 ^ in7;
- out7 = out6 ^ in0;
- out0 = out7 ^ in1;
- out1 = tmp0 ^ out6;
- out3 = tmp1 ^ out6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_7B(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in1 ^ in3;
- tmp0 = in0 ^ in5;
- out4 = tmp0 ^ out2 ^ in2;
- tmp1 = out4 ^ in4;
- out6 = tmp1 ^ in7;
- out5 = tmp1 ^ in5 ^ in6;
- out0 = out6 ^ in1 ^ in6;
- tmp2 = out0 ^ in2;
- out1 = tmp2 ^ in1;
- out3 = tmp2 ^ in4;
- out7 = tmp0 ^ out5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_7C(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in3 ^ in5;
- tmp1 = tmp0 ^ in4;
- out0 = tmp1 ^ in2;
- out1 = tmp1 ^ in6;
- out7 = out0 ^ in1 ^ in5 ^ in7;
- out5 = out1 ^ out7 ^ in0;
- out3 = out5 ^ in6;
- out6 = tmp0 ^ out5;
- out2 = out6 ^ in1;
- out4 = out2 ^ out7 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_7D(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in1 ^ in2;
- tmp1 = tmp0 ^ in3;
- tmp2 = tmp0 ^ in6;
- out7 = tmp1 ^ in4;
- tmp3 = tmp2 ^ in0;
- out5 = tmp3 ^ in7;
- out4 = tmp3 ^ in2 ^ in5;
- out2 = tmp1 ^ out5;
- out6 = tmp2 ^ out2;
- out0 = out4 ^ out7 ^ in6;
- out1 = tmp3 ^ out0;
- out3 = out6 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_7E(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in3 ^ in4;
- tmp1 = in0 ^ in5;
- out1 = tmp0 ^ tmp1 ^ in6;
- out3 = tmp1 ^ in1;
- out4 = out1 ^ in1 ^ in7;
- tmp2 = out4 ^ in3;
- out5 = tmp2 ^ in2;
- out6 = tmp0 ^ out5;
- out7 = tmp1 ^ out4 ^ in2;
- out2 = out6 ^ in5 ^ in7;
- out0 = tmp2 ^ out2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_7F(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in7;
- tmp1 = tmp0 ^ in3 ^ in5;
- tmp2 = tmp1 ^ in0;
- out0 = tmp2 ^ in4;
- out6 = tmp2 ^ in1;
- out3 = tmp0 ^ out6;
- tmp3 = out3 ^ in6;
- out1 = tmp3 ^ in4;
- out2 = tmp3 ^ in5;
- out4 = tmp3 ^ in7;
- out5 = tmp1 ^ out1;
- out7 = out0 ^ out4 ^ in3;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_80(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in3;
- tmp1 = in4 ^ in5;
- out1 = in2 ^ in6 ^ in7;
- out5 = tmp0 ^ in4;
- tmp2 = tmp0 ^ in1;
- out6 = tmp1 ^ in3;
- out7 = tmp1 ^ in0 ^ in6;
- out4 = tmp2 ^ in7;
- out3 = tmp2 ^ out6;
- out2 = out3 ^ out5 ^ in6;
- out0 = out2 ^ in3 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_81(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in4 ^ in6;
- tmp1 = tmp0 ^ in3;
- out6 = tmp1 ^ in5;
- out5 = out6 ^ in2 ^ in6;
- out3 = out5 ^ in1;
- out2 = tmp0 ^ out3;
- out1 = out3 ^ out6 ^ in7;
- out4 = tmp1 ^ out1;
- out7 = out2 ^ out4 ^ in0;
- out0 = out7 ^ in1 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_82(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in1 ^ in2;
- tmp0 = in6 ^ in7;
- out5 = in2 ^ in3;
- out6 = in3 ^ in4;
- out7 = in0 ^ in4 ^ in5;
- out0 = in1 ^ in5 ^ in6;
- out1 = tmp0 ^ in0 ^ in2;
- out2 = tmp0 ^ in3 ^ in5;
- out3 = tmp0 ^ out0 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_83(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in1;
- tmp1 = in2 ^ in5;
- tmp2 = in3 ^ in6;
- out4 = in1 ^ in2 ^ in4;
- out0 = tmp0 ^ in5 ^ in6;
- out5 = tmp1 ^ in3;
- tmp3 = tmp1 ^ in7;
- out6 = tmp2 ^ in4;
- out2 = tmp2 ^ tmp3;
- tmp4 = tmp3 ^ out4;
- out1 = tmp3 ^ out0;
- out3 = tmp4 ^ in3;
- out7 = tmp0 ^ tmp4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_84(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out1 = in2 ^ in6;
- out6 = in3 ^ in5;
- out0 = in1 ^ in5 ^ in7;
- out7 = in0 ^ in4 ^ in6;
- out4 = in1 ^ in3 ^ in6;
- out5 = in2 ^ in4 ^ in7;
- out2 = out6 ^ in0 ^ in1;
- out3 = out5 ^ in5 ^ in6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_85(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in1 ^ in6;
- tmp1 = in3 ^ in6;
- tmp2 = tmp0 ^ in4;
- out1 = tmp0 ^ in2;
- out6 = tmp1 ^ in5;
- out4 = tmp2 ^ in3;
- tmp3 = out1 ^ out6;
- out2 = tmp3 ^ in0;
- out3 = tmp2 ^ tmp3 ^ in7;
- out7 = out2 ^ out3 ^ in1;
- out5 = tmp1 ^ out3;
- out0 = tmp2 ^ out7 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_86(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out6 = in3;
- out7 = in0 ^ in4;
- out0 = in1 ^ in5;
- out5 = in2 ^ in7;
- out3 = in4 ^ in5 ^ in6;
- out1 = in0 ^ in2 ^ in6;
- out4 = in1 ^ in6 ^ in7;
- out2 = in0 ^ in3 ^ in5 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_87(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out6 = in3 ^ in6;
- tmp0 = in0 ^ in1;
- out7 = in0 ^ in4 ^ in7;
- out5 = in2 ^ in5 ^ in7;
- out3 = out6 ^ in4 ^ in5;
- out0 = tmp0 ^ in5;
- tmp1 = tmp0 ^ in6;
- out2 = out5 ^ in0 ^ in3;
- out1 = tmp1 ^ in2;
- out4 = tmp1 ^ out7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_88(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out1 = in2 ^ in7;
- tmp0 = in5 ^ in6;
- out0 = in1 ^ in6 ^ in7;
- out6 = in4 ^ in5 ^ in7;
- out3 = out0 ^ out1 ^ in0 ^ in4;
- out7 = tmp0 ^ in0;
- tmp1 = tmp0 ^ in3;
- out2 = out0 ^ in3;
- out4 = tmp1 ^ in2;
- out5 = tmp1 ^ out6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_89(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in7;
- tmp1 = in2 ^ in7;
- tmp2 = tmp0 ^ in6;
- out1 = tmp1 ^ in1;
- out7 = tmp2 ^ in5;
- out0 = tmp2 ^ in1;
- out2 = out1 ^ in3 ^ in6;
- out6 = out7 ^ in0 ^ in4;
- out5 = out6 ^ in3;
- out3 = tmp0 ^ out2 ^ in4;
- out4 = tmp1 ^ out5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_8A(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in1 ^ in6;
- out7 = in0 ^ in5;
- out2 = in3 ^ in6;
- out6 = in4 ^ in7;
- out1 = in0 ^ in2 ^ in7;
- out3 = out0 ^ out6 ^ in0;
- out4 = out1 ^ out7 ^ in6;
- out5 = out2 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_8B(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in1;
- tmp1 = in3 ^ in6;
- tmp2 = in5 ^ in7;
- tmp3 = tmp0 ^ in7;
- out0 = tmp0 ^ in6;
- out2 = tmp1 ^ in2;
- out5 = tmp1 ^ tmp2;
- out7 = tmp2 ^ in0;
- tmp4 = tmp3 ^ in4;
- out1 = tmp3 ^ in2;
- out6 = tmp4 ^ out0;
- out4 = out6 ^ in2 ^ in5;
- out3 = tmp1 ^ tmp4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_8C(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out1 = in2;
- out0 = in1 ^ in7;
- out7 = in0 ^ in6;
- out5 = in4 ^ in6;
- out6 = in5 ^ in7;
- out2 = out0 ^ in0 ^ in3;
- out3 = out5 ^ out7 ^ in2 ^ in7;
- out4 = out6 ^ in3;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_8D(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out1 = in1 ^ in2;
- tmp0 = in6 ^ in7;
- out0 = in0 ^ in1 ^ in7;
- out5 = in4 ^ in5 ^ in6;
- out6 = tmp0 ^ in5;
- out7 = tmp0 ^ in0;
- out4 = tmp0 ^ out5 ^ in3;
- out2 = out0 ^ in2 ^ in3;
- out3 = out2 ^ in1 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_8E(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in1;
- out4 = in5;
- out7 = in0;
- out5 = in6;
- out6 = in7;
- out3 = in0 ^ in4;
- out1 = in0 ^ in2;
- out2 = in0 ^ in3;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_8F(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in0 ^ in1;
- tmp0 = in0 ^ in3;
- out4 = in4 ^ in5;
- out7 = in0 ^ in7;
- out5 = in5 ^ in6;
- out6 = in6 ^ in7;
- out1 = out0 ^ in2;
- out2 = tmp0 ^ in2;
- out3 = tmp0 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_90(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in1 ^ in2;
- tmp1 = in2 ^ in6 ^ in7;
- out3 = tmp0 ^ in7;
- out1 = tmp1 ^ in5;
- tmp2 = out1 ^ in4;
- out6 = tmp2 ^ in3;
- out5 = out6 ^ in1;
- out4 = out5 ^ in0;
- out0 = tmp0 ^ tmp2;
- out7 = tmp0 ^ out4;
- out2 = tmp1 ^ out5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_91(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in4;
- tmp1 = tmp0 ^ in3 ^ in5;
- out2 = tmp1 ^ in1;
- out6 = tmp1 ^ in7;
- tmp2 = out2 ^ in5 ^ in7;
- out3 = tmp2 ^ in4;
- out5 = tmp2 ^ in6;
- out1 = tmp1 ^ out5 ^ in2;
- tmp3 = out1 ^ in0;
- out4 = tmp3 ^ in3;
- out0 = tmp0 ^ tmp3;
- out7 = tmp2 ^ tmp3;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_92(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in1;
- tmp0 = in4 ^ in5;
- tmp1 = tmp0 ^ in1;
- out2 = tmp0 ^ in3 ^ in7;
- out0 = tmp1 ^ in6;
- out7 = out2 ^ in0;
- out4 = out0 ^ in0 ^ in2;
- out5 = out4 ^ out7 ^ in5;
- out6 = tmp1 ^ out5;
- out1 = out6 ^ out7 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_93(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in1 ^ in3;
- tmp0 = in2 ^ in7;
- tmp1 = out3 ^ in6;
- tmp2 = tmp0 ^ in4;
- out5 = tmp0 ^ tmp1;
- out6 = tmp2 ^ in3;
- out2 = out6 ^ in5;
- out0 = out2 ^ out5 ^ in0;
- out7 = tmp1 ^ out0;
- out1 = tmp2 ^ out0;
- out4 = out1 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_94(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in2 ^ in6;
- tmp0 = in1 ^ in4 ^ in5;
- out1 = out3 ^ in5;
- out5 = tmp0 ^ out3;
- out0 = tmp0 ^ in7;
- out4 = tmp0 ^ in0 ^ in3;
- out6 = out1 ^ in3 ^ in7;
- out2 = out4 ^ in6;
- out7 = out0 ^ out2 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_95(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in3;
- out3 = tmp0 ^ in6;
- tmp1 = tmp0 ^ in7;
- tmp2 = out3 ^ in0;
- out6 = tmp1 ^ in5;
- tmp3 = tmp2 ^ in4;
- out7 = tmp3 ^ in2;
- tmp4 = tmp3 ^ in5;
- out2 = tmp4 ^ in1;
- tmp5 = out2 ^ in6;
- out0 = tmp1 ^ tmp5;
- out1 = tmp5 ^ out7;
- out4 = tmp2 ^ out1;
- out5 = tmp4 ^ out4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_96(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in6 ^ in7;
- tmp0 = in1 ^ in5;
- tmp1 = in5 ^ in6;
- out6 = out3 ^ in2 ^ in3;
- out0 = tmp0 ^ in4;
- tmp2 = tmp1 ^ in2;
- out4 = out0 ^ in0 ^ in7;
- out1 = tmp2 ^ in0;
- out5 = tmp2 ^ in1;
- out7 = tmp0 ^ out4 ^ in3;
- out2 = tmp1 ^ out7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_97(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in4;
- tmp1 = in2 ^ in6;
- out3 = in3 ^ in6 ^ in7;
- out7 = tmp0 ^ in3;
- tmp2 = tmp0 ^ in5;
- out5 = tmp1 ^ in1;
- out6 = tmp1 ^ out3;
- out0 = tmp2 ^ in1;
- out2 = tmp2 ^ out3 ^ in2;
- tmp3 = out0 ^ in4;
- out4 = tmp3 ^ in7;
- out1 = tmp1 ^ tmp3;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_98(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in5 ^ in7;
- tmp1 = in1 ^ in4 ^ in7;
- out1 = tmp0 ^ in2;
- out0 = tmp1 ^ in6;
- out2 = tmp1 ^ in3;
- out6 = out0 ^ out1 ^ in1;
- out5 = tmp0 ^ out2;
- out3 = tmp1 ^ out6 ^ in0;
- out7 = out0 ^ out5 ^ in0;
- out4 = out6 ^ out7 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_99(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in3;
- out5 = in1 ^ in3 ^ in4;
- out6 = in2 ^ in4 ^ in5;
- out4 = tmp0 ^ in2;
- tmp1 = tmp0 ^ in6;
- tmp2 = out5 ^ in7;
- out7 = tmp1 ^ in5;
- out0 = tmp1 ^ tmp2;
- out2 = tmp2 ^ in2;
- out3 = out0 ^ out6 ^ in3;
- out1 = tmp1 ^ out3;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_9A(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in3 ^ in4;
- tmp0 = in0 ^ in5;
- tmp1 = in1 ^ in6;
- out5 = in1 ^ in3 ^ in5;
- tmp2 = tmp0 ^ in7;
- out3 = tmp0 ^ tmp1;
- out0 = tmp1 ^ in4;
- out7 = tmp2 ^ in3;
- out1 = tmp2 ^ in2;
- out6 = out0 ^ in1 ^ in2;
- out4 = out1 ^ in4 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_9B(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out5 = in1 ^ in3;
- tmp0 = in3 ^ in5;
- out6 = in2 ^ in4;
- out4 = in0 ^ in2 ^ in7;
- out7 = tmp0 ^ in0;
- out2 = out6 ^ in3;
- out1 = out4 ^ in1 ^ in5;
- out3 = out7 ^ in1 ^ in6;
- out0 = tmp0 ^ out3 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_9C(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out1 = in2 ^ in5;
- tmp0 = in0 ^ in3 ^ in6;
- out3 = out1 ^ in0;
- out6 = out1 ^ in6;
- out7 = tmp0 ^ in7;
- out4 = out7 ^ in4;
- out2 = out4 ^ in1;
- out0 = tmp0 ^ out2;
- out5 = out0 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_9D(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out6 = in2 ^ in5;
- tmp0 = in0 ^ in3;
- out5 = in1 ^ in4 ^ in7;
- out1 = out6 ^ in1;
- out3 = tmp0 ^ out6;
- out7 = tmp0 ^ in6;
- out0 = out5 ^ in0;
- out4 = out7 ^ in7;
- out2 = out5 ^ out7 ^ in2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_9E(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in1 ^ in4;
- tmp0 = in0 ^ in5;
- out6 = in2 ^ in6;
- out7 = in0 ^ in3 ^ in7;
- out4 = in0 ^ in4 ^ in6;
- out5 = in1 ^ in5 ^ in7;
- out1 = tmp0 ^ in2;
- out3 = tmp0 ^ in7;
- out2 = out4 ^ in3;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_9F(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out6 = in2;
- out7 = in0 ^ in3;
- tmp0 = in0 ^ in1;
- out4 = in0 ^ in6;
- out5 = in1 ^ in7;
- out1 = tmp0 ^ in2 ^ in5;
- out2 = out7 ^ in2 ^ in4 ^ in6;
- out3 = out7 ^ in5 ^ in7;
- out0 = tmp0 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_A0(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in1 ^ in6;
- out2 = tmp0 ^ in7;
- tmp1 = tmp0 ^ in5;
- out6 = out2 ^ in3 ^ in4;
- out0 = tmp1 ^ in3;
- tmp2 = out0 ^ in2;
- out3 = tmp2 ^ in7;
- tmp3 = tmp2 ^ in1;
- out5 = tmp3 ^ in0;
- out4 = tmp3 ^ out6;
- out7 = out5 ^ out6 ^ in1;
- out1 = tmp1 ^ out4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_A1(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in5;
- tmp1 = tmp0 ^ in1;
- tmp2 = tmp0 ^ in4;
- out4 = tmp1 ^ in7;
- out7 = tmp2 ^ in0;
- out6 = tmp2 ^ out4 ^ in3;
- out3 = out4 ^ in6;
- out2 = out3 ^ in5;
- out1 = out2 ^ in4;
- out5 = out1 ^ out6 ^ in0;
- out0 = tmp1 ^ out5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_A2(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in6;
- tmp0 = in1 ^ in3 ^ in5;
- out3 = tmp0 ^ in6;
- out4 = tmp0 ^ in2 ^ in4;
- out0 = out3 ^ in7;
- out6 = out0 ^ in4;
- out1 = out0 ^ out4 ^ in0;
- out7 = out1 ^ in5;
- out5 = out7 ^ in3 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_A3(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in2 ^ in6;
- out3 = in1 ^ in5 ^ in6;
- tmp0 = out2 ^ in0;
- out4 = out2 ^ out3 ^ in3;
- tmp1 = tmp0 ^ in4;
- out0 = tmp0 ^ out4 ^ in7;
- out5 = tmp1 ^ in3;
- out7 = tmp1 ^ in5;
- out1 = tmp1 ^ in1 ^ in7;
- out6 = tmp1 ^ out0 ^ in2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_A4(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in1 ^ in3;
- tmp1 = in2 ^ in4;
- tmp2 = in2 ^ in5;
- tmp3 = in0 ^ in7;
- out0 = tmp0 ^ in5;
- out6 = tmp0 ^ in6 ^ in7;
- out1 = tmp1 ^ in6;
- out7 = tmp1 ^ tmp3;
- out3 = tmp2 ^ in3;
- tmp4 = tmp2 ^ out1;
- out2 = tmp3 ^ in1;
- out5 = tmp4 ^ out7;
- out4 = tmp4 ^ in1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_A5(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in2 ^ in5;
- tmp0 = in1 ^ in6;
- tmp1 = in0 ^ in1;
- tmp2 = in2 ^ in4;
- out6 = in1 ^ in3 ^ in7;
- out4 = tmp0 ^ in5;
- out1 = tmp0 ^ tmp2;
- out0 = tmp1 ^ in3 ^ in5;
- out2 = tmp1 ^ in2 ^ in7;
- out7 = tmp2 ^ in0;
- out5 = tmp0 ^ out2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_A6(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in0;
- out3 = in3 ^ in5 ^ in7;
- out1 = in0 ^ in2 ^ in4 ^ in6;
- out0 = out3 ^ in1;
- out7 = out1 ^ in7;
- out6 = out0 ^ in6;
- out5 = out7 ^ in5;
- out4 = out6 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_A7(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in0 ^ in2;
- out3 = in5 ^ in7;
- out7 = out2 ^ in4 ^ in6;
- out6 = out3 ^ in1 ^ in3;
- out1 = out7 ^ in1;
- out5 = out7 ^ in7;
- out0 = out6 ^ in0;
- out4 = out6 ^ in6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_A8(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in4;
- tmp1 = in1 ^ in6;
- tmp2 = in0 ^ in2 ^ in7;
- out1 = tmp0 ^ in7;
- out4 = tmp0 ^ in6;
- out0 = tmp1 ^ in3;
- out2 = tmp1 ^ in5;
- out6 = tmp1 ^ in4;
- out7 = tmp2 ^ in5;
- out3 = tmp2 ^ out0 ^ in6;
- out5 = out7 ^ in2 ^ in3;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_A9(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in2 ^ in6;
- out6 = in1 ^ in4;
- out7 = in0 ^ in2 ^ in5;
- out5 = in0 ^ in3 ^ in7;
- out2 = out4 ^ in1 ^ in5;
- out1 = out6 ^ in2 ^ in7;
- out0 = out2 ^ out7 ^ in3;
- out3 = out1 ^ in0 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_AA(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in2;
- tmp1 = in1 ^ in3;
- tmp2 = in6 ^ in7;
- out1 = tmp0 ^ in4 ^ in7;
- out3 = tmp1 ^ in0;
- out0 = tmp1 ^ tmp2;
- out2 = tmp2 ^ in5;
- out7 = tmp0 ^ out2;
- out6 = out1 ^ out7 ^ in1;
- out5 = out0 ^ out6 ^ in0;
- out4 = out5 ^ out7 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_AB(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in0 ^ in1;
- tmp0 = in1 ^ in4;
- tmp1 = in0 ^ in7;
- out6 = tmp0 ^ in5;
- out1 = tmp0 ^ tmp1 ^ in2;
- out5 = tmp1 ^ in3 ^ in4;
- out0 = tmp0 ^ out5 ^ in6;
- out4 = out0 ^ out3 ^ in2;
- out2 = out4 ^ in3 ^ in5;
- out7 = tmp1 ^ out2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_AC(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in1 ^ in3;
- out1 = in2 ^ in4;
- tmp0 = in0 ^ in2;
- out4 = in4 ^ in7;
- out5 = in0 ^ in5;
- out6 = in1 ^ in6;
- out7 = tmp0 ^ in7;
- out3 = tmp0 ^ in3 ^ in6;
- out2 = out5 ^ in1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_AD(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in7;
- out5 = in0;
- out6 = in1;
- out7 = in0 ^ in2;
- out0 = in0 ^ in1 ^ in3;
- out2 = out7 ^ in1 ^ in5;
- out1 = in1 ^ in2 ^ in4;
- out3 = out7 ^ in6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_AE(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in3 ^ in4;
- tmp0 = in0 ^ in4;
- tmp1 = in0 ^ in7;
- out0 = in1 ^ in3 ^ in7;
- out1 = tmp0 ^ in2;
- out5 = tmp0 ^ in5;
- tmp2 = tmp1 ^ in6;
- out2 = tmp1 ^ in5;
- out3 = tmp2 ^ in3;
- out7 = tmp2 ^ in2;
- out6 = tmp2 ^ out2 ^ in1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_AF(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in3;
- tmp0 = in0 ^ in7;
- out5 = in0 ^ in4;
- out6 = in1 ^ in5;
- out7 = in0 ^ in2 ^ in6;
- out0 = tmp0 ^ in1 ^ in3;
- out3 = tmp0 ^ in6;
- out2 = tmp0 ^ in2 ^ in5;
- out1 = out5 ^ in1 ^ in2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_B0(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in1 ^ in4;
- tmp1 = in3 ^ in6;
- out2 = tmp0 ^ in7;
- tmp2 = tmp0 ^ tmp1;
- out0 = tmp2 ^ in5;
- out3 = tmp2 ^ in2;
- out6 = out3 ^ in6;
- tmp3 = out6 ^ in0 ^ in1;
- out7 = tmp3 ^ in5;
- out5 = tmp3 ^ out2;
- out1 = out0 ^ out5 ^ in0;
- out4 = tmp1 ^ out5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_B1(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in1 ^ in4;
- out2 = tmp0 ^ in2 ^ in7;
- tmp1 = out2 ^ in6;
- out1 = tmp1 ^ in5;
- out3 = tmp1 ^ in7;
- out4 = tmp1 ^ in0;
- out6 = out3 ^ in3;
- out0 = out6 ^ in0 ^ in2 ^ in5;
- out5 = tmp1 ^ out0 ^ in1;
- out7 = tmp0 ^ out5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_B2(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in4;
- tmp0 = in4 ^ in7;
- tmp1 = in1 ^ in3 ^ in6;
- out3 = tmp0 ^ tmp1;
- tmp2 = tmp1 ^ in0;
- out0 = out3 ^ in5;
- out4 = tmp2 ^ in2;
- tmp3 = out4 ^ in6;
- out5 = tmp0 ^ tmp3;
- out1 = tmp3 ^ out0;
- tmp4 = out1 ^ in7;
- out7 = tmp4 ^ in3;
- out6 = tmp2 ^ tmp4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_B3(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in2 ^ in4;
- tmp0 = in0 ^ in5;
- tmp1 = in1 ^ in6;
- out3 = tmp1 ^ in4 ^ in7;
- tmp2 = tmp0 ^ out3;
- out0 = tmp2 ^ in3;
- out1 = tmp2 ^ in2;
- out5 = out0 ^ in2 ^ in6;
- out7 = tmp1 ^ out5;
- out4 = out7 ^ in1 ^ in5 ^ in7;
- out6 = tmp0 ^ out4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_B4(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in0 ^ in1;
- out5 = out4 ^ in2;
- tmp0 = out4 ^ in4;
- out6 = out5 ^ in0 ^ in3;
- out7 = tmp0 ^ out6;
- out2 = tmp0 ^ in6 ^ in7;
- out3 = out7 ^ in0 ^ in7;
- out0 = out5 ^ out7 ^ in5;
- out1 = out0 ^ out6 ^ in6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_B5(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in1;
- tmp1 = in2 ^ in4;
- out4 = tmp0 ^ in4;
- out3 = tmp1 ^ in7;
- tmp2 = out4 ^ in5;
- out7 = out3 ^ in0 ^ in3;
- out0 = tmp2 ^ in3;
- out2 = tmp0 ^ out3 ^ in6;
- out5 = tmp1 ^ tmp2;
- out6 = out2 ^ out7 ^ in2;
- out1 = tmp0 ^ out0 ^ out6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_B6(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in3 ^ in4;
- tmp0 = in1 ^ in2;
- tmp1 = in0 ^ in4;
- tmp2 = in3 ^ in5;
- tmp3 = out3 ^ in1 ^ in7;
- out5 = tmp0 ^ tmp1;
- out6 = tmp0 ^ tmp2;
- out2 = tmp1 ^ in6;
- out4 = tmp1 ^ tmp3;
- out0 = tmp3 ^ in5;
- out1 = out2 ^ in2 ^ in5;
- out7 = tmp2 ^ out1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_B7(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in4;
- tmp0 = in0 ^ in4;
- out2 = tmp0 ^ in2 ^ in6;
- tmp1 = out2 ^ in7;
- out1 = out2 ^ in1 ^ in5;
- out7 = tmp1 ^ in3;
- out5 = out1 ^ in6;
- out6 = tmp0 ^ out1 ^ in3;
- out0 = tmp1 ^ out6;
- out4 = out0 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_B8(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in1 ^ in4;
- tmp1 = in2 ^ in5;
- out2 = tmp0 ^ in5;
- out4 = tmp1 ^ in0;
- tmp2 = tmp1 ^ in7;
- out6 = tmp2 ^ out2;
- out7 = out4 ^ in3;
- out1 = tmp2 ^ in4;
- out3 = tmp0 ^ out7;
- out0 = out3 ^ out4 ^ in6;
- out5 = out0 ^ in0 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_B9(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in2;
- tmp1 = in4 ^ in5;
- out4 = tmp0 ^ tmp1;
- tmp2 = tmp0 ^ in3 ^ in7;
- out3 = out4 ^ in1;
- out7 = tmp2 ^ in5;
- out2 = out3 ^ in0;
- out1 = out2 ^ in7;
- out6 = out1 ^ in5 ^ in6;
- out0 = tmp2 ^ out6;
- out5 = tmp1 ^ out0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_BA(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in5 ^ in7;
- out2 = tmp0 ^ in4;
- tmp1 = out2 ^ in2;
- out1 = tmp1 ^ in0;
- out6 = tmp1 ^ in1;
- out4 = out1 ^ in3 ^ in4;
- tmp2 = out4 ^ out6;
- out7 = out4 ^ in6 ^ in7;
- out5 = tmp2 ^ in6;
- out3 = tmp0 ^ tmp2;
- out0 = out6 ^ out7 ^ in0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_BB(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in2 ^ in4 ^ in5 ^ in7;
- tmp0 = out2 ^ in1;
- out4 = out2 ^ in0 ^ in3;
- out1 = tmp0 ^ in0;
- out6 = tmp0 ^ in6;
- out3 = out1 ^ in2;
- tmp1 = out4 ^ out6 ^ in4;
- out0 = tmp1 ^ in7;
- out5 = tmp1 ^ in5;
- out7 = tmp0 ^ tmp1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_BC(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in2;
- tmp1 = in2 ^ in4;
- out0 = in1 ^ in3 ^ in4;
- out6 = in1 ^ in2 ^ in7;
- out7 = tmp0 ^ in3;
- out5 = tmp0 ^ out6 ^ in6;
- out1 = tmp1 ^ in5;
- tmp2 = out1 ^ out5 ^ in1;
- out3 = tmp2 ^ in3;
- out4 = tmp1 ^ tmp2;
- out2 = tmp2 ^ out6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_BD(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in3;
- tmp1 = in1 ^ in4;
- out0 = tmp0 ^ tmp1;
- out7 = tmp0 ^ in2 ^ in7;
- out1 = tmp1 ^ in2 ^ in5;
- tmp2 = out1 ^ in0;
- out2 = tmp2 ^ in6;
- out3 = out2 ^ in1 ^ in7;
- out4 = out3 ^ in2;
- out5 = tmp1 ^ out4;
- out6 = tmp2 ^ out4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_BE(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in3 ^ in6;
- out4 = tmp0 ^ in5;
- out7 = tmp0 ^ in2;
- out3 = out4 ^ in4;
- out1 = out3 ^ out7 ^ in0;
- out2 = out3 ^ in3 ^ in7;
- out0 = out2 ^ out4 ^ in1;
- out5 = tmp0 ^ out0;
- out6 = out1 ^ out5 ^ in6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_BF(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in4;
- out3 = tmp0 ^ in5 ^ in6;
- out4 = out3 ^ in3;
- tmp1 = out3 ^ in7;
- out2 = tmp1 ^ in2;
- out5 = tmp1 ^ in1;
- tmp2 = out2 ^ in5;
- out7 = tmp2 ^ in3 ^ in4;
- tmp3 = tmp0 ^ out5;
- out0 = tmp3 ^ out4;
- out1 = tmp2 ^ tmp3;
- out6 = tmp3 ^ in2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_C0(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out5 = in2 ^ in5;
- tmp0 = in1 ^ in4;
- tmp1 = in3 ^ in6;
- out0 = out5 ^ in1;
- out4 = tmp0 ^ in7;
- out3 = tmp0 ^ tmp1;
- out1 = tmp1 ^ in2;
- out6 = tmp1 ^ in0;
- out7 = out4 ^ in0;
- out2 = out4 ^ out5 ^ in3;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_C1(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out5 = in2;
- tmp0 = in0 ^ in1;
- out4 = in1 ^ in7;
- out6 = in0 ^ in3;
- out3 = in1 ^ in4 ^ in6;
- tmp1 = tmp0 ^ in2;
- out7 = tmp0 ^ in4;
- out0 = tmp1 ^ in5;
- out1 = tmp1 ^ out6 ^ in6;
- out2 = out6 ^ out7 ^ in5 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_C2(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in1 ^ in3 ^ in4;
- tmp0 = in0 ^ in3 ^ in6;
- out5 = in2 ^ in4 ^ in5;
- tmp1 = out4 ^ in7;
- out1 = tmp0 ^ in2;
- out6 = tmp0 ^ in5;
- out2 = out5 ^ in3;
- out7 = tmp0 ^ tmp1;
- out3 = tmp1 ^ in2 ^ in6;
- out0 = tmp1 ^ out2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_C3(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in1 ^ in3;
- tmp0 = in0 ^ in2;
- tmp1 = in3 ^ in5;
- out5 = in2 ^ in4;
- tmp2 = tmp0 ^ out4;
- out2 = tmp1 ^ in4;
- out6 = tmp1 ^ in0;
- out0 = tmp1 ^ tmp2 ^ in7;
- out1 = tmp2 ^ in6;
- out7 = out1 ^ out5 ^ in3;
- out3 = tmp0 ^ out7 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_C4(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in3 ^ in7;
- out3 = tmp0 ^ in4;
- tmp1 = tmp0 ^ in2;
- out1 = tmp1 ^ in6;
- out5 = tmp1 ^ in5;
- out4 = out1 ^ out3 ^ in1;
- out0 = out4 ^ in4 ^ in5;
- out2 = out0 ^ out3 ^ in0;
- out7 = out1 ^ out2 ^ in7;
- out6 = tmp1 ^ out0 ^ out7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_C5(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in4 ^ in7;
- tmp0 = in3 ^ in7;
- out4 = in1 ^ in2 ^ in6;
- out6 = in0 ^ in3 ^ in4;
- out5 = tmp0 ^ in2;
- out1 = tmp0 ^ out4;
- out0 = out4 ^ in0 ^ in5;
- out2 = out0 ^ out5 ^ in4;
- out7 = tmp0 ^ out2 ^ in6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_C6(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in5 ^ in6;
- tmp1 = in1 ^ in7;
- tmp2 = tmp0 ^ in0;
- tmp3 = tmp0 ^ tmp1;
- tmp4 = tmp2 ^ in4;
- out0 = tmp3 ^ in2;
- out6 = tmp4 ^ in3;
- out2 = out6 ^ in2;
- out7 = tmp1 ^ tmp4;
- out3 = tmp2 ^ out2;
- tmp5 = out3 ^ in5;
- out5 = tmp5 ^ in7;
- out4 = tmp3 ^ tmp5;
- out1 = tmp4 ^ out5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_C7(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in2 ^ in4;
- tmp0 = in3 ^ in5;
- tmp1 = out3 ^ in7;
- out6 = tmp0 ^ in0 ^ in4;
- out5 = tmp1 ^ in3;
- out2 = out6 ^ in6;
- out7 = out2 ^ in1 ^ in3;
- out0 = tmp1 ^ out7;
- out1 = tmp0 ^ out0;
- out4 = out1 ^ in0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_C8(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out0 = in1 ^ in2;
- out1 = in2 ^ in3;
- tmp0 = in5 ^ in6;
- tmp1 = in0 ^ in7;
- out2 = out1 ^ in1 ^ in4;
- out4 = tmp0 ^ in4;
- out5 = tmp0 ^ in7;
- out6 = tmp1 ^ in6;
- out7 = tmp1 ^ in1;
- out3 = out2 ^ in0 ^ in2 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_C9(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in5 ^ in6;
- out7 = in0 ^ in1;
- tmp0 = in1 ^ in3;
- out5 = in6 ^ in7;
- out6 = in0 ^ in7;
- out0 = out7 ^ in2;
- out3 = out7 ^ in4 ^ in5;
- out1 = tmp0 ^ in2;
- out2 = tmp0 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_CA(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in7;
- tmp1 = in2 ^ in7;
- tmp2 = tmp0 ^ in6;
- out0 = tmp1 ^ in1;
- tmp3 = tmp1 ^ in3;
- out6 = tmp2 ^ in5;
- out7 = tmp2 ^ in1;
- out2 = tmp3 ^ in4;
- out5 = out6 ^ in0 ^ in4;
- out4 = out5 ^ in3;
- out1 = tmp0 ^ tmp3;
- out3 = tmp3 ^ out5 ^ out7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_CB(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in4 ^ in7;
- tmp1 = in5 ^ in7;
- out7 = in0 ^ in1 ^ in6;
- out5 = tmp0 ^ in6;
- out2 = tmp0 ^ in3;
- out6 = tmp1 ^ in0;
- out4 = tmp1 ^ in3 ^ in6;
- tmp2 = out5 ^ out7 ^ in2;
- out1 = tmp2 ^ out2;
- out0 = tmp2 ^ in4;
- out3 = tmp2 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_CC(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in3 ^ in5;
- tmp1 = in1 ^ in6;
- out1 = in2 ^ in3 ^ in7;
- out5 = tmp0 ^ in6;
- out0 = tmp1 ^ in2;
- tmp2 = out5 ^ in0 ^ in7;
- out3 = tmp2 ^ in4;
- out6 = tmp0 ^ out3;
- out7 = tmp1 ^ tmp2 ^ in3;
- tmp3 = out1 ^ out6;
- out4 = tmp2 ^ tmp3;
- out2 = tmp3 ^ in1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_CD(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out5 = in3 ^ in6;
- tmp0 = in0 ^ in1;
- tmp1 = in2 ^ in7;
- out6 = in0 ^ in4 ^ in7;
- out2 = tmp0 ^ out5 ^ in4;
- out7 = tmp0 ^ in5;
- out0 = tmp0 ^ in2 ^ in6;
- out4 = tmp1 ^ in5;
- out1 = tmp1 ^ in1 ^ in3;
- out3 = out6 ^ in5 ^ in6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_CE(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in5;
- tmp1 = tmp0 ^ in3;
- out4 = tmp1 ^ in4;
- tmp2 = out4 ^ in6;
- out3 = tmp2 ^ in0;
- out5 = tmp2 ^ in2;
- out2 = out3 ^ in5 ^ in7;
- out6 = tmp1 ^ out2;
- out7 = out2 ^ out4 ^ in1;
- out1 = tmp2 ^ out6;
- out0 = tmp0 ^ out7 ^ in0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_CF(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in3 ^ in6;
- tmp1 = in0 ^ in1 ^ in5;
- out4 = in2 ^ in3 ^ in5;
- out5 = tmp0 ^ in4;
- out7 = tmp1 ^ in6;
- out1 = tmp1 ^ out4 ^ in7;
- tmp2 = out5 ^ in0;
- out2 = tmp2 ^ in7;
- out3 = tmp2 ^ out4;
- out6 = tmp0 ^ out2 ^ in5;
- out0 = tmp0 ^ out1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_D0(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in3;
- tmp1 = in1 ^ in4;
- tmp2 = in2 ^ in5;
- out7 = tmp0 ^ tmp1;
- out0 = tmp1 ^ tmp2;
- tmp3 = tmp2 ^ in3;
- out1 = tmp3 ^ in6;
- tmp4 = out1 ^ in1;
- out2 = tmp4 ^ in7;
- out3 = out2 ^ in2;
- out4 = tmp0 ^ out3;
- out5 = tmp3 ^ out3;
- out6 = tmp4 ^ out4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_D1(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in3 ^ in5 ^ in6;
- tmp1 = tmp0 ^ in1;
- out1 = tmp1 ^ in2;
- out2 = tmp1 ^ in7;
- out3 = out2 ^ in3;
- out5 = out3 ^ in2;
- tmp2 = out3 ^ in0;
- out4 = tmp2 ^ in4;
- out7 = tmp0 ^ out4;
- out6 = tmp2 ^ out1 ^ in6;
- out0 = out2 ^ out6 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_D2(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in5 ^ in6;
- out2 = tmp0 ^ in2 ^ in3;
- out1 = out2 ^ in0;
- out3 = out2 ^ in1;
- out4 = out1 ^ in1 ^ in2;
- out6 = out1 ^ in6 ^ in7;
- out7 = out4 ^ in4 ^ in5;
- out5 = out4 ^ out6 ^ in4;
- out0 = tmp0 ^ out5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_D3(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in3 ^ in5 ^ in6;
- tmp0 = out2 ^ in2;
- tmp1 = tmp0 ^ in1;
- out1 = tmp1 ^ in0;
- out3 = tmp1 ^ in3;
- out4 = out1 ^ in2 ^ in4;
- tmp2 = out4 ^ in5;
- out7 = tmp2 ^ in7;
- out0 = tmp0 ^ out7;
- tmp3 = out0 ^ in0;
- out5 = tmp3 ^ in6;
- out6 = tmp2 ^ tmp3;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_D4(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in3 ^ in5;
- tmp0 = in1 ^ in5;
- tmp1 = tmp0 ^ in2;
- out4 = tmp1 ^ in0;
- tmp2 = tmp1 ^ in6;
- out2 = out4 ^ in3 ^ in7;
- out0 = tmp2 ^ in4;
- out5 = tmp2 ^ out3;
- out1 = tmp0 ^ out5 ^ in7;
- out6 = tmp0 ^ out2 ^ in4;
- out7 = tmp1 ^ out6 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_D5(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in5;
- tmp0 = in0 ^ in4;
- tmp1 = tmp0 ^ in1 ^ in5;
- out4 = tmp1 ^ in2;
- out0 = out4 ^ in6;
- tmp2 = tmp0 ^ out0;
- out5 = tmp2 ^ in3;
- out1 = out5 ^ in7;
- out6 = tmp1 ^ out1;
- out7 = tmp2 ^ out6;
- out2 = out7 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_D6(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in1 ^ in2 ^ in4 ^ in6;
- out5 = tmp0 ^ in3;
- out0 = tmp0 ^ in5 ^ in7;
- out3 = out0 ^ out5 ^ in2;
- tmp1 = out3 ^ in0;
- out1 = tmp1 ^ in6;
- out2 = tmp1 ^ in7;
- out4 = tmp1 ^ in1;
- out6 = tmp1 ^ in4;
- out7 = tmp0 ^ out2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_D7(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in3;
- out3 = in2 ^ in5 ^ in7;
- out2 = tmp0 ^ in5;
- tmp1 = tmp0 ^ out3 ^ in1;
- out1 = tmp1 ^ in6;
- out4 = tmp1 ^ in4;
- tmp2 = out1 ^ in4;
- out6 = tmp2 ^ in1;
- out7 = tmp2 ^ in2;
- out0 = tmp2 ^ in3;
- out5 = tmp2 ^ in0 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_D8(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in0;
- out5 = in1;
- tmp0 = in1 ^ in2;
- out6 = in0 ^ in2;
- out0 = tmp0 ^ in4;
- tmp1 = tmp0 ^ in3;
- out7 = tmp1 ^ out6;
- out2 = tmp1 ^ in6;
- out3 = out7 ^ in7;
- out1 = tmp1 ^ in1 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_D9(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in0 ^ in4;
- out5 = in1 ^ in5;
- out2 = in1 ^ in3 ^ in6;
- out3 = in0 ^ in1 ^ in7;
- out6 = in0 ^ in2 ^ in6;
- out0 = out4 ^ in1 ^ in2;
- out1 = out5 ^ in2 ^ in3;
- out7 = out3 ^ in3;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_DA(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out5 = in1 ^ in4;
- tmp0 = in2 ^ in7;
- tmp1 = in0 ^ in2 ^ in3;
- out0 = tmp0 ^ out5;
- out4 = tmp0 ^ tmp1;
- out2 = tmp0 ^ in3 ^ in6;
- out1 = tmp1 ^ in5;
- out3 = tmp1 ^ in1;
- out6 = out1 ^ in3;
- out7 = out3 ^ in2 ^ in6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_DB(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in1;
- tmp1 = in1 ^ in5;
- tmp2 = in3 ^ in7;
- out3 = tmp0 ^ in2;
- out5 = tmp1 ^ in4;
- out6 = tmp1 ^ out3 ^ in6;
- out2 = tmp2 ^ in6;
- tmp3 = tmp2 ^ in4;
- tmp4 = out3 ^ in3;
- out4 = tmp3 ^ in0;
- out1 = tmp4 ^ in5;
- out0 = tmp3 ^ tmp4;
- out7 = tmp0 ^ out2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_DC(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in2;
- tmp1 = in0 ^ in3;
- out6 = tmp0 ^ in4;
- tmp2 = tmp0 ^ in7;
- out3 = tmp1 ^ in6;
- tmp3 = tmp1 ^ in1;
- out1 = tmp1 ^ tmp2 ^ in5;
- out4 = tmp2 ^ in6;
- out2 = tmp3 ^ in2;
- out7 = tmp3 ^ in5;
- out5 = tmp2 ^ out2;
- out0 = out2 ^ out3 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_DD(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in0 ^ in6;
- out2 = in0 ^ in1 ^ in3;
- out6 = out3 ^ in2 ^ in4;
- out7 = out2 ^ in5 ^ in7;
- out0 = out6 ^ in1;
- out4 = out6 ^ in7;
- out5 = out7 ^ in0;
- out1 = out5 ^ in2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_DE(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in3 ^ in6;
- tmp1 = in3 ^ in4 ^ in7;
- out4 = tmp0 ^ in0;
- out5 = tmp1 ^ in1;
- out3 = out4 ^ in7;
- out2 = out3 ^ in6;
- out1 = out2 ^ in5;
- out6 = tmp1 ^ out1;
- out0 = tmp0 ^ out5;
- out7 = out0 ^ out1 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_DF(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in0 ^ in3 ^ in7;
- tmp0 = out2 ^ in1 ^ in5;
- out1 = tmp0 ^ in2;
- out7 = tmp0 ^ in6;
- out5 = tmp0 ^ in0 ^ in4;
- tmp1 = out1 ^ out5 ^ in6;
- out4 = tmp1 ^ in3;
- out6 = tmp1 ^ in5;
- tmp2 = tmp1 ^ in7;
- out0 = tmp2 ^ in1;
- out3 = tmp2 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_E0(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in1 ^ in7;
- tmp0 = in2 ^ in4;
- out4 = out3 ^ in3 ^ in5;
- out2 = tmp0 ^ in1;
- tmp1 = tmp0 ^ in6;
- out0 = out4 ^ in2;
- out6 = out4 ^ in0;
- out1 = tmp1 ^ in3;
- out5 = tmp1 ^ in0;
- out7 = out5 ^ in1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_E1(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in1 ^ in4;
- tmp0 = in1 ^ in7;
- out3 = tmp0 ^ in3;
- tmp1 = out3 ^ in5;
- out4 = tmp1 ^ in4;
- tmp2 = tmp1 ^ in0;
- out0 = tmp2 ^ in2;
- out6 = tmp2 ^ in6;
- tmp3 = out0 ^ out4 ^ in6;
- out5 = tmp3 ^ in5;
- out7 = tmp0 ^ tmp3;
- out1 = tmp2 ^ out5 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_E2(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in1 ^ in2;
- out4 = in1 ^ in5;
- out2 = in2 ^ in4 ^ in7;
- out5 = in0 ^ in2 ^ in6;
- out0 = out3 ^ in3 ^ in5;
- out7 = out3 ^ in0 ^ in4;
- out6 = out2 ^ out7 ^ in3;
- out1 = out5 ^ in3 ^ in4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_E3(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in4 ^ in7;
- tmp0 = in1 ^ in3;
- out3 = tmp0 ^ in2;
- tmp1 = out3 ^ in0;
- out0 = tmp1 ^ in5;
- tmp2 = tmp1 ^ in4;
- out1 = tmp2 ^ in6;
- tmp3 = tmp2 ^ in3;
- out7 = tmp3 ^ in7;
- out6 = out1 ^ out2 ^ in2;
- tmp4 = tmp0 ^ out0;
- out5 = tmp4 ^ in6;
- out4 = tmp3 ^ tmp4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_E4(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in6;
- tmp0 = in0 ^ in4;
- tmp1 = tmp0 ^ in2 ^ in6;
- out2 = tmp1 ^ in1;
- out7 = out2 ^ in5;
- tmp2 = tmp0 ^ out7;
- out4 = tmp2 ^ in3;
- out0 = out4 ^ in7;
- out6 = tmp1 ^ out0;
- out5 = tmp2 ^ out6;
- out1 = out5 ^ in0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_E5(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in3 ^ in6;
- tmp0 = in0 ^ in1;
- tmp1 = in5 ^ in7;
- out2 = tmp0 ^ in4 ^ in6;
- tmp2 = tmp1 ^ out2;
- out6 = tmp2 ^ in3;
- out7 = tmp2 ^ in2;
- out0 = out6 ^ in2 ^ in4;
- out5 = out6 ^ in1 ^ in2;
- out1 = tmp0 ^ out5 ^ in5;
- out4 = tmp1 ^ out1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_E6(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in2 ^ in6 ^ in7;
- out2 = out3 ^ in0 ^ in4;
- out4 = out3 ^ in1 ^ in5;
- out1 = out2 ^ in3;
- out7 = out2 ^ out4 ^ in2;
- out0 = out4 ^ in3 ^ in7;
- out5 = out1 ^ in4;
- out6 = out0 ^ out2 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_E7(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in3;
- out3 = tmp0 ^ in6 ^ in7;
- tmp1 = out3 ^ in0;
- out5 = tmp1 ^ in5;
- tmp2 = tmp1 ^ in4;
- tmp3 = out5 ^ in7;
- out1 = tmp2 ^ in1;
- out0 = tmp3 ^ in1;
- out6 = out1 ^ in2;
- out2 = tmp0 ^ tmp2;
- tmp4 = tmp3 ^ out6;
- out4 = tmp4 ^ in6;
- out7 = tmp4 ^ in0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_E8(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in3 ^ in6;
- tmp0 = in4 ^ in7;
- out1 = in2 ^ in3 ^ in4;
- out5 = tmp0 ^ in0;
- tmp1 = tmp0 ^ in1;
- tmp2 = tmp1 ^ in5;
- out0 = tmp1 ^ out1;
- out2 = tmp2 ^ in2;
- out6 = tmp2 ^ out5;
- tmp3 = out6 ^ in6;
- out3 = tmp3 ^ in7;
- out7 = tmp3 ^ in2 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_E9(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in1;
- tmp1 = in3 ^ in6;
- tmp2 = tmp0 ^ in6;
- out4 = tmp1 ^ in4;
- out6 = tmp2 ^ in5;
- out7 = tmp2 ^ in2 ^ in7;
- out3 = out6 ^ in3 ^ in7;
- out0 = tmp1 ^ out7;
- out2 = out3 ^ out4 ^ in0;
- out5 = tmp0 ^ out2;
- out1 = out0 ^ out5 ^ in5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_EA(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in6 ^ in7;
- out5 = in0 ^ in7;
- out6 = in0 ^ in1;
- out0 = in1 ^ in2 ^ in3;
- out2 = in2 ^ in4 ^ in5;
- out7 = out6 ^ in2;
- out1 = out0 ^ out6 ^ in4;
- out3 = out7 ^ in5 ^ in6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_EB(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in4 ^ in5;
- tmp0 = in0 ^ in1;
- out4 = in4 ^ in6 ^ in7;
- out5 = in0 ^ in5 ^ in7;
- out6 = tmp0 ^ in6;
- tmp1 = tmp0 ^ in2;
- out0 = tmp1 ^ in3;
- out7 = tmp1 ^ in7;
- out1 = out0 ^ in4;
- out3 = out0 ^ in5 ^ in6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_EC(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out3 = in0 ^ in5;
- out4 = in2 ^ in3 ^ in7;
- out5 = in0 ^ in3 ^ in4;
- out6 = out3 ^ in1 ^ in4;
- out1 = out4 ^ in4;
- out0 = out4 ^ in1 ^ in6;
- out2 = out0 ^ out5 ^ in5;
- out7 = out2 ^ in4 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_ED(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in2 ^ in4;
- tmp1 = in3 ^ in5;
- out4 = tmp0 ^ in3 ^ in7;
- out3 = tmp1 ^ in0;
- out1 = out4 ^ in1;
- out5 = out3 ^ in4;
- out7 = out1 ^ out5 ^ in6;
- out2 = tmp0 ^ out7;
- out0 = tmp1 ^ out7;
- out6 = out2 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_EE(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in2;
- tmp0 = in0 ^ in1;
- out5 = in0 ^ in3;
- tmp1 = tmp0 ^ in2;
- out6 = tmp0 ^ in4;
- tmp2 = tmp1 ^ out5;
- out7 = tmp1 ^ in5;
- out1 = tmp2 ^ out6 ^ in7;
- out0 = tmp2 ^ in6;
- tmp3 = out7 ^ in1;
- out3 = tmp3 ^ in7;
- out2 = tmp3 ^ in4 ^ in6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_EF(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out4 = in2 ^ in4;
- tmp0 = in0 ^ in5;
- tmp1 = in4 ^ in6;
- out5 = tmp0 ^ in3;
- out2 = tmp0 ^ tmp1;
- out6 = tmp1 ^ in0 ^ in1;
- out3 = out5 ^ in2 ^ in7;
- out7 = out3 ^ in1 ^ in3;
- out0 = out4 ^ out6 ^ in3;
- out1 = tmp1 ^ out0 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_F0(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in1 ^ in2;
- tmp1 = in4 ^ in5;
- out2 = tmp0 ^ in6;
- out3 = tmp1 ^ in1;
- tmp2 = tmp1 ^ in7;
- out1 = out2 ^ out3 ^ in3;
- tmp3 = tmp0 ^ tmp2;
- out0 = tmp3 ^ in3;
- out5 = tmp3 ^ in0;
- out4 = out1 ^ out5 ^ in4;
- out7 = out4 ^ in2;
- out6 = tmp2 ^ out7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_F1(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in1 ^ in6;
- tmp0 = in3 ^ in5;
- out3 = tmp0 ^ in1 ^ in4;
- tmp1 = out3 ^ in2;
- out1 = tmp1 ^ in6;
- tmp2 = tmp1 ^ in0;
- tmp3 = out1 ^ in5;
- out0 = tmp2 ^ in7;
- out6 = tmp2 ^ in4;
- out7 = tmp3 ^ in0;
- out5 = tmp0 ^ out0;
- out4 = tmp3 ^ out5 ^ in1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_F2(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in4 ^ in5;
- out2 = in2 ^ in6 ^ in7;
- tmp1 = tmp0 ^ in1;
- tmp2 = tmp1 ^ in2;
- out0 = tmp2 ^ in3;
- out3 = tmp2 ^ in7;
- out5 = out3 ^ in0 ^ in4;
- tmp3 = tmp0 ^ out5;
- out7 = tmp3 ^ in3;
- out4 = tmp3 ^ out2;
- out1 = out0 ^ out4 ^ in4;
- out6 = tmp1 ^ out1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_F3(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in6 ^ in7;
- tmp0 = in0 ^ in1;
- out4 = tmp0 ^ in6;
- tmp1 = tmp0 ^ in2;
- out5 = tmp1 ^ in7;
- out6 = tmp1 ^ in3;
- out7 = out6 ^ in4;
- out0 = out7 ^ in5;
- out1 = out0 ^ in6;
- out3 = out0 ^ in0 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_F4(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in0 ^ in1 ^ in2;
- tmp0 = out2 ^ in3;
- out4 = tmp0 ^ in4;
- out5 = out4 ^ in5;
- out6 = out5 ^ in6;
- out7 = out6 ^ in7;
- out0 = out7 ^ in0;
- out1 = out0 ^ in1;
- out3 = tmp0 ^ out7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_F5(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in0 ^ in1;
- tmp0 = out2 ^ in2;
- out4 = tmp0 ^ in3;
- out5 = out4 ^ in4;
- out6 = out5 ^ in5;
- out7 = out6 ^ in6;
- out0 = out7 ^ in7;
- out1 = out0 ^ in0;
- out3 = tmp0 ^ out0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_F6(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in7;
- out2 = tmp0 ^ in2;
- out4 = out2 ^ in1 ^ in4;
- out7 = out4 ^ in3 ^ in5;
- out5 = out7 ^ in4 ^ in7;
- out0 = tmp0 ^ out7 ^ in6;
- tmp1 = out0 ^ in1;
- out6 = out0 ^ in0 ^ in5;
- out3 = tmp1 ^ in3;
- out1 = tmp0 ^ tmp1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_F7(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in0 ^ in7;
- tmp0 = out2 ^ in1;
- out4 = tmp0 ^ in2;
- out5 = out4 ^ in3 ^ in7;
- out6 = out5 ^ in4;
- out7 = out6 ^ in5;
- out0 = out7 ^ in6;
- out1 = out0 ^ in7;
- out3 = tmp0 ^ out1;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_F8(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in4;
- tmp1 = in3 ^ in5;
- tmp2 = tmp0 ^ in6;
- out4 = tmp0 ^ tmp1;
- out1 = tmp1 ^ in2 ^ in4;
- out3 = tmp2 ^ in1;
- out5 = out3 ^ in5;
- out7 = out1 ^ out5 ^ in7;
- out6 = tmp1 ^ out7;
- out0 = tmp2 ^ out7;
- out2 = out6 ^ in0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_F9(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in3 ^ in5;
- tmp1 = in0 ^ in6;
- out4 = tmp0 ^ in0;
- tmp2 = tmp1 ^ in4;
- tmp3 = tmp1 ^ in2;
- out5 = tmp2 ^ in1;
- out3 = out5 ^ in3;
- tmp4 = tmp3 ^ out3;
- out1 = tmp4 ^ in5;
- out0 = tmp4 ^ in0 ^ in7;
- out6 = tmp0 ^ out0 ^ in4;
- out7 = tmp2 ^ tmp4;
- out2 = tmp3 ^ out6;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_FA(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in1;
- tmp1 = tmp0 ^ in2;
- tmp2 = tmp0 ^ in5;
- tmp3 = tmp1 ^ in7;
- out5 = tmp2 ^ in6;
- out6 = tmp3 ^ in6;
- out7 = tmp3 ^ in3;
- out3 = out6 ^ in4;
- out2 = tmp1 ^ out5;
- out4 = out2 ^ out3 ^ in1;
- out0 = out4 ^ out7 ^ in5;
- out1 = tmp2 ^ out0;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_FB(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in5 ^ in6;
- tmp0 = in0 ^ in1;
- out4 = in0 ^ in5 ^ in7;
- out5 = tmp0 ^ in6;
- tmp1 = tmp0 ^ in2;
- out6 = tmp1 ^ in7;
- out7 = tmp1 ^ in3;
- out0 = out7 ^ in4;
- out1 = out0 ^ in5;
- out3 = out0 ^ in6 ^ in7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_FC(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in1 ^ in2;
- tmp1 = in0 ^ in7;
- out2 = tmp0 ^ tmp1 ^ in5;
- out3 = tmp1 ^ in4;
- tmp2 = out2 ^ in6;
- out6 = tmp2 ^ in4;
- out7 = tmp2 ^ in3;
- out4 = out6 ^ in1 ^ in3;
- tmp3 = out4 ^ in0;
- out1 = tmp3 ^ in6;
- out0 = tmp3 ^ in1 ^ in5;
- out5 = tmp0 ^ out4;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_FD(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in5;
- tmp1 = in1 ^ in7;
- out2 = tmp0 ^ tmp1;
- out6 = out2 ^ in2 ^ in4;
- tmp2 = out6 ^ in0;
- out1 = tmp2 ^ in3;
- out0 = tmp0 ^ out1 ^ in6;
- out5 = out0 ^ in2;
- tmp3 = out5 ^ in1;
- out3 = tmp3 ^ in6;
- out7 = tmp2 ^ tmp3;
- out4 = tmp1 ^ out7;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_FE(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- tmp0 = in0 ^ in2;
- out2 = tmp0 ^ in5;
- out3 = tmp0 ^ in4;
- tmp1 = out3 ^ in6;
- out4 = tmp1 ^ in5;
- tmp2 = tmp1 ^ in1;
- out6 = tmp2 ^ in7;
- tmp3 = tmp2 ^ in0;
- out0 = tmp3 ^ in3;
- tmp4 = out0 ^ out4 ^ in7;
- out5 = tmp4 ^ in6;
- out7 = tmp4 ^ in2;
- out1 = tmp3 ^ out5;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-static void gf8_muladd_FF(uint8_t * out, uint8_t * in, unsigned int width)
-{
- unsigned int i;
- uint64_t * in_ptr = (uint64_t *)in;
- uint64_t * out_ptr = (uint64_t *)out;
-
- for (i = 0; i < width; i++)
- {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- uint64_t in0 = out_ptr[0];
- uint64_t in1 = out_ptr[width];
- uint64_t in2 = out_ptr[width * 2];
- uint64_t in3 = out_ptr[width * 3];
- uint64_t in4 = out_ptr[width * 4];
- uint64_t in5 = out_ptr[width * 5];
- uint64_t in6 = out_ptr[width * 6];
- uint64_t in7 = out_ptr[width * 7];
-
- out2 = in0 ^ in5;
- tmp0 = in4 ^ in7;
- tmp1 = out2 ^ in2;
- out4 = tmp1 ^ in6;
- out7 = tmp1 ^ in1 ^ in3;
- out1 = tmp0 ^ out7;
- tmp2 = out1 ^ in5;
- out6 = tmp2 ^ in3;
- tmp3 = tmp2 ^ in7;
- out0 = tmp3 ^ in6;
- out3 = tmp3 ^ in1;
- out5 = tmp0 ^ out0 ^ in2;
-
- out_ptr[0] = out0 ^ in_ptr[0];
- out_ptr[width] = out1 ^ in_ptr[width];
- out_ptr[width * 2] = out2 ^ in_ptr[width * 2];
- out_ptr[width * 3] = out3 ^ in_ptr[width * 3];
- out_ptr[width * 4] = out4 ^ in_ptr[width * 4];
- out_ptr[width * 5] = out5 ^ in_ptr[width * 5];
- out_ptr[width * 6] = out6 ^ in_ptr[width * 6];
- out_ptr[width * 7] = out7 ^ in_ptr[width * 7];
-
- in_ptr++;
- out_ptr++;
- }
-}
-
-void (* ec_gf_muladd[])(uint8_t * out, uint8_t * in, unsigned int width) =
-{
- gf8_muladd_00, gf8_muladd_01, gf8_muladd_02, gf8_muladd_03,
- gf8_muladd_04, gf8_muladd_05, gf8_muladd_06, gf8_muladd_07,
- gf8_muladd_08, gf8_muladd_09, gf8_muladd_0A, gf8_muladd_0B,
- gf8_muladd_0C, gf8_muladd_0D, gf8_muladd_0E, gf8_muladd_0F,
- gf8_muladd_10, gf8_muladd_11, gf8_muladd_12, gf8_muladd_13,
- gf8_muladd_14, gf8_muladd_15, gf8_muladd_16, gf8_muladd_17,
- gf8_muladd_18, gf8_muladd_19, gf8_muladd_1A, gf8_muladd_1B,
- gf8_muladd_1C, gf8_muladd_1D, gf8_muladd_1E, gf8_muladd_1F,
- gf8_muladd_20, gf8_muladd_21, gf8_muladd_22, gf8_muladd_23,
- gf8_muladd_24, gf8_muladd_25, gf8_muladd_26, gf8_muladd_27,
- gf8_muladd_28, gf8_muladd_29, gf8_muladd_2A, gf8_muladd_2B,
- gf8_muladd_2C, gf8_muladd_2D, gf8_muladd_2E, gf8_muladd_2F,
- gf8_muladd_30, gf8_muladd_31, gf8_muladd_32, gf8_muladd_33,
- gf8_muladd_34, gf8_muladd_35, gf8_muladd_36, gf8_muladd_37,
- gf8_muladd_38, gf8_muladd_39, gf8_muladd_3A, gf8_muladd_3B,
- gf8_muladd_3C, gf8_muladd_3D, gf8_muladd_3E, gf8_muladd_3F,
- gf8_muladd_40, gf8_muladd_41, gf8_muladd_42, gf8_muladd_43,
- gf8_muladd_44, gf8_muladd_45, gf8_muladd_46, gf8_muladd_47,
- gf8_muladd_48, gf8_muladd_49, gf8_muladd_4A, gf8_muladd_4B,
- gf8_muladd_4C, gf8_muladd_4D, gf8_muladd_4E, gf8_muladd_4F,
- gf8_muladd_50, gf8_muladd_51, gf8_muladd_52, gf8_muladd_53,
- gf8_muladd_54, gf8_muladd_55, gf8_muladd_56, gf8_muladd_57,
- gf8_muladd_58, gf8_muladd_59, gf8_muladd_5A, gf8_muladd_5B,
- gf8_muladd_5C, gf8_muladd_5D, gf8_muladd_5E, gf8_muladd_5F,
- gf8_muladd_60, gf8_muladd_61, gf8_muladd_62, gf8_muladd_63,
- gf8_muladd_64, gf8_muladd_65, gf8_muladd_66, gf8_muladd_67,
- gf8_muladd_68, gf8_muladd_69, gf8_muladd_6A, gf8_muladd_6B,
- gf8_muladd_6C, gf8_muladd_6D, gf8_muladd_6E, gf8_muladd_6F,
- gf8_muladd_70, gf8_muladd_71, gf8_muladd_72, gf8_muladd_73,
- gf8_muladd_74, gf8_muladd_75, gf8_muladd_76, gf8_muladd_77,
- gf8_muladd_78, gf8_muladd_79, gf8_muladd_7A, gf8_muladd_7B,
- gf8_muladd_7C, gf8_muladd_7D, gf8_muladd_7E, gf8_muladd_7F,
- gf8_muladd_80, gf8_muladd_81, gf8_muladd_82, gf8_muladd_83,
- gf8_muladd_84, gf8_muladd_85, gf8_muladd_86, gf8_muladd_87,
- gf8_muladd_88, gf8_muladd_89, gf8_muladd_8A, gf8_muladd_8B,
- gf8_muladd_8C, gf8_muladd_8D, gf8_muladd_8E, gf8_muladd_8F,
- gf8_muladd_90, gf8_muladd_91, gf8_muladd_92, gf8_muladd_93,
- gf8_muladd_94, gf8_muladd_95, gf8_muladd_96, gf8_muladd_97,
- gf8_muladd_98, gf8_muladd_99, gf8_muladd_9A, gf8_muladd_9B,
- gf8_muladd_9C, gf8_muladd_9D, gf8_muladd_9E, gf8_muladd_9F,
- gf8_muladd_A0, gf8_muladd_A1, gf8_muladd_A2, gf8_muladd_A3,
- gf8_muladd_A4, gf8_muladd_A5, gf8_muladd_A6, gf8_muladd_A7,
- gf8_muladd_A8, gf8_muladd_A9, gf8_muladd_AA, gf8_muladd_AB,
- gf8_muladd_AC, gf8_muladd_AD, gf8_muladd_AE, gf8_muladd_AF,
- gf8_muladd_B0, gf8_muladd_B1, gf8_muladd_B2, gf8_muladd_B3,
- gf8_muladd_B4, gf8_muladd_B5, gf8_muladd_B6, gf8_muladd_B7,
- gf8_muladd_B8, gf8_muladd_B9, gf8_muladd_BA, gf8_muladd_BB,
- gf8_muladd_BC, gf8_muladd_BD, gf8_muladd_BE, gf8_muladd_BF,
- gf8_muladd_C0, gf8_muladd_C1, gf8_muladd_C2, gf8_muladd_C3,
- gf8_muladd_C4, gf8_muladd_C5, gf8_muladd_C6, gf8_muladd_C7,
- gf8_muladd_C8, gf8_muladd_C9, gf8_muladd_CA, gf8_muladd_CB,
- gf8_muladd_CC, gf8_muladd_CD, gf8_muladd_CE, gf8_muladd_CF,
- gf8_muladd_D0, gf8_muladd_D1, gf8_muladd_D2, gf8_muladd_D3,
- gf8_muladd_D4, gf8_muladd_D5, gf8_muladd_D6, gf8_muladd_D7,
- gf8_muladd_D8, gf8_muladd_D9, gf8_muladd_DA, gf8_muladd_DB,
- gf8_muladd_DC, gf8_muladd_DD, gf8_muladd_DE, gf8_muladd_DF,
- gf8_muladd_E0, gf8_muladd_E1, gf8_muladd_E2, gf8_muladd_E3,
- gf8_muladd_E4, gf8_muladd_E5, gf8_muladd_E6, gf8_muladd_E7,
- gf8_muladd_E8, gf8_muladd_E9, gf8_muladd_EA, gf8_muladd_EB,
- gf8_muladd_EC, gf8_muladd_ED, gf8_muladd_EE, gf8_muladd_EF,
- gf8_muladd_F0, gf8_muladd_F1, gf8_muladd_F2, gf8_muladd_F3,
- gf8_muladd_F4, gf8_muladd_F5, gf8_muladd_F6, gf8_muladd_F7,
- gf8_muladd_F8, gf8_muladd_F9, gf8_muladd_FA, gf8_muladd_FB,
- gf8_muladd_FC, gf8_muladd_FD, gf8_muladd_FE, gf8_muladd_FF
-};
diff --git a/xlators/cluster/ec/src/ec-gf8.c b/xlators/cluster/ec/src/ec-gf8.c
new file mode 100644
index 00000000000..039adae5929
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-gf8.c
@@ -0,0 +1,5882 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "ec-gf8.h"
+
+static ec_gf_op_t ec_gf8_mul_00_ops[] = {{EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_00 = {0,
+ {
+ 0,
+ },
+ ec_gf8_mul_00_ops};
+
+static ec_gf_op_t ec_gf8_mul_01_ops[] = {{EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_01 = {8,
+ {
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_01_ops};
+
+static ec_gf_op_t ec_gf8_mul_02_ops[] = {{EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_02 = {8,
+ {
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_02_ops};
+
+static ec_gf_op_t ec_gf8_mul_03_ops[] = {
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_COPY, 8, 3, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_03 = {9,
+ {
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ },
+ ec_gf8_mul_03_ops};
+
+static ec_gf_op_t ec_gf8_mul_04_ops[] = {
+ {EC_GF_OP_XOR3, 8, 6, 7}, {EC_GF_OP_XOR2, 2, 8, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 1, 8, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_04 = {9,
+ {
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 8,
+ },
+ ec_gf8_mul_04_ops};
+
+static ec_gf_op_t ec_gf8_mul_05_ops[] = {
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_05 = {8,
+ {
+ 0,
+ 1,
+ 2,
+ 6,
+ 7,
+ 3,
+ 4,
+ 5,
+ },
+ ec_gf8_mul_05_ops};
+
+static ec_gf_op_t ec_gf8_mul_06_ops[] = {
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_COPY, 8, 2, 0},
+ {EC_GF_OP_XOR2, 8, 3, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_06 = {9,
+ {
+ 7,
+ 0,
+ 1,
+ 2,
+ 8,
+ 3,
+ 4,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_06_ops};
+
+static ec_gf_op_t ec_gf8_mul_07_ops[] = {
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_07 = {8,
+ {
+ 6,
+ 0,
+ 1,
+ 3,
+ 2,
+ 4,
+ 5,
+ 7,
+ },
+ ec_gf8_mul_07_ops};
+
+static ec_gf_op_t ec_gf8_mul_08_ops[] = {
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR3, 8, 6, 7},
+ {EC_GF_OP_XOR2, 1, 8, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 2, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_08 = {9,
+ {
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 8,
+ },
+ ec_gf8_mul_08_ops};
+
+static ec_gf_op_t ec_gf8_mul_09_ops[] = {
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_09 = {8,
+ {
+ 0,
+ 1,
+ 2,
+ 3,
+ 5,
+ 6,
+ 7,
+ 4,
+ },
+ ec_gf8_mul_09_ops};
+
+static ec_gf_op_t ec_gf8_mul_0A_ops[] = {
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_0A = {8,
+ {
+ 5,
+ 0,
+ 1,
+ 2,
+ 6,
+ 7,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_0A_ops};
+
+static ec_gf_op_t ec_gf8_mul_0B_ops[] = {
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_COPY, 9, 3, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_COPY, 8, 5, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR3, 3, 8, 6}, {EC_GF_OP_XOR2, 1, 9, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_0B = {10,
+ {
+ 7,
+ 1,
+ 5,
+ 2,
+ 4,
+ 3,
+ 0,
+ 6,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_0B_ops};
+
+static ec_gf_op_t ec_gf8_mul_0C_ops[] = {
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_COPY, 8, 1, 0},
+ {EC_GF_OP_XOR2, 8, 2, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_0C = {9,
+ {
+ 5,
+ 7,
+ 0,
+ 1,
+ 8,
+ 2,
+ 3,
+ 4,
+ 6,
+ },
+ ec_gf8_mul_0C_ops};
+
+static ec_gf_op_t ec_gf8_mul_0D_ops[] = {
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR3, 8, 2, 4}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 1, 8, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR3, 2, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_0D = {9,
+ {
+ 5,
+ 6,
+ 7,
+ 3,
+ 1,
+ 0,
+ 2,
+ 4,
+ 8,
+ },
+ ec_gf8_mul_0D_ops};
+
+static ec_gf_op_t ec_gf8_mul_0E_ops[] = {
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_0E = {8,
+ {
+ 7,
+ 0,
+ 6,
+ 1,
+ 3,
+ 2,
+ 4,
+ 5,
+ },
+ ec_gf8_mul_0E_ops};
+
+static ec_gf_op_t ec_gf8_mul_0F_ops[] = {
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_0F = {8,
+ {
+ 1,
+ 0,
+ 5,
+ 6,
+ 7,
+ 2,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_0F_ops};
+
+static ec_gf_op_t ec_gf8_mul_10_ops[] = {
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_10 = {8,
+ {
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ },
+ ec_gf8_mul_10_ops};
+
+static ec_gf_op_t ec_gf8_mul_11_ops[] = {
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_11 = {8,
+ {
+ 4,
+ 1,
+ 2,
+ 6,
+ 0,
+ 5,
+ 7,
+ 3,
+ },
+ ec_gf8_mul_11_ops};
+
+static ec_gf_op_t ec_gf8_mul_12_ops[] = {
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_12 = {8,
+ {
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 5,
+ 6,
+ 4,
+ },
+ ec_gf8_mul_12_ops};
+
+static ec_gf_op_t ec_gf8_mul_13_ops[] = {
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR3, 8, 3, 7},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 6, 8, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 0, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_13 = {9,
+ {
+ 4,
+ 5,
+ 2,
+ 6,
+ 0,
+ 1,
+ 7,
+ 3,
+ 8,
+ },
+ ec_gf8_mul_13_ops};
+
+static ec_gf_op_t ec_gf8_mul_14_ops[] = {
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_14 = {8,
+ {
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 4,
+ 5,
+ 3,
+ },
+ ec_gf8_mul_14_ops};
+
+static ec_gf_op_t ec_gf8_mul_15_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR3, 5, 8, 7},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_15 = {9,
+ {
+ 0,
+ 1,
+ 2,
+ 4,
+ 7,
+ 6,
+ 5,
+ 3,
+ 8,
+ },
+ ec_gf8_mul_15_ops};
+
+static ec_gf_op_t ec_gf8_mul_16_ops[] = {
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_16 = {8,
+ {
+ 6,
+ 7,
+ 4,
+ 1,
+ 2,
+ 3,
+ 5,
+ 0,
+ },
+ ec_gf8_mul_16_ops};
+
+static ec_gf_op_t ec_gf8_mul_17_ops[] = {
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_17 = {8,
+ {
+ 5,
+ 7,
+ 0,
+ 1,
+ 3,
+ 2,
+ 4,
+ 6,
+ },
+ ec_gf8_mul_17_ops};
+
+static ec_gf_op_t ec_gf8_mul_18_ops[] = {
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 6, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_18 = {9,
+ {
+ 4,
+ 5,
+ 7,
+ 6,
+ 0,
+ 1,
+ 2,
+ 3,
+ 8,
+ },
+ ec_gf8_mul_18_ops};
+
+static ec_gf_op_t ec_gf8_mul_19_ops[] = {
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_19 = {8,
+ {
+ 0,
+ 5,
+ 2,
+ 6,
+ 7,
+ 1,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_19_ops};
+
+static ec_gf_op_t ec_gf8_mul_1A_ops[] = {
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_1A = {8,
+ {
+ 7,
+ 0,
+ 4,
+ 5,
+ 3,
+ 1,
+ 2,
+ 6,
+ },
+ ec_gf8_mul_1A_ops};
+
+static ec_gf_op_t ec_gf8_mul_1B_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_1B = {8,
+ {
+ 7,
+ 4,
+ 5,
+ 6,
+ 3,
+ 1,
+ 2,
+ 0,
+ },
+ ec_gf8_mul_1B_ops};
+
+static ec_gf_op_t ec_gf8_mul_1C_ops[] = {
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_1C = {8,
+ {
+ 5,
+ 4,
+ 3,
+ 0,
+ 1,
+ 7,
+ 2,
+ 6,
+ },
+ ec_gf8_mul_1C_ops};
+
+static ec_gf_op_t ec_gf8_mul_1D_ops[] = {
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR3, 8, 4, 2},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 5, 8, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_1D = {9,
+ {
+ 0,
+ 7,
+ 5,
+ 8,
+ 2,
+ 3,
+ 4,
+ 1,
+ 6,
+ },
+ ec_gf8_mul_1D_ops};
+
+static ec_gf_op_t ec_gf8_mul_1E_ops[] = {
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_1E = {8,
+ {
+ 4,
+ 7,
+ 5,
+ 1,
+ 6,
+ 0,
+ 2,
+ 3,
+ },
+ ec_gf8_mul_1E_ops};
+
+static ec_gf_op_t ec_gf8_mul_1F_ops[] = {
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR3, 8, 3, 7},
+ {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_XOR2, 1, 8, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_1F = {9,
+ {
+ 1,
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 3,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_1F_ops};
+
+static ec_gf_op_t ec_gf8_mul_20_ops[] = {
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_20 = {8,
+ {
+ 7,
+ 4,
+ 5,
+ 6,
+ 3,
+ 0,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_20_ops};
+
+static ec_gf_op_t ec_gf8_mul_21_ops[] = {
+ {EC_GF_OP_COPY, 9, 0, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR3, 8, 7, 5}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_XOR2, 2, 8, 0},
+ {EC_GF_OP_XOR2, 4, 9, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_21 = {10,
+ {
+ 0,
+ 1,
+ 2,
+ 7,
+ 5,
+ 4,
+ 3,
+ 6,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_21_ops};
+
+static ec_gf_op_t ec_gf8_mul_22_ops[] = {
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_22 = {8,
+ {
+ 3,
+ 0,
+ 5,
+ 2,
+ 6,
+ 4,
+ 1,
+ 7,
+ },
+ ec_gf8_mul_22_ops};
+
+static ec_gf_op_t ec_gf8_mul_23_ops[] = {
+ {EC_GF_OP_COPY, 8, 2, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 3, 8, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_23 = {9,
+ {
+ 0,
+ 4,
+ 3,
+ 2,
+ 5,
+ 6,
+ 1,
+ 8,
+ 7,
+ },
+ ec_gf8_mul_23_ops};
+
+static ec_gf_op_t ec_gf8_mul_24_ops[] = {
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_24 = {8,
+ {
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 4,
+ 5,
+ 3,
+ },
+ ec_gf8_mul_24_ops};
+
+static ec_gf_op_t ec_gf8_mul_25_ops[] = {
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_25 = {8,
+ {
+ 2,
+ 7,
+ 0,
+ 1,
+ 3,
+ 4,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_25_ops};
+
+static ec_gf_op_t ec_gf8_mul_26_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_26 = {8,
+ {
+ 3,
+ 4,
+ 1,
+ 2,
+ 0,
+ 5,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_26_ops};
+
+static ec_gf_op_t ec_gf8_mul_27_ops[] = {
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_27 = {8,
+ {
+ 3,
+ 0,
+ 1,
+ 2,
+ 6,
+ 7,
+ 4,
+ 5,
+ },
+ ec_gf8_mul_27_ops};
+
+static ec_gf_op_t ec_gf8_mul_28_ops[] = {
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_28 = {8,
+ {
+ 5,
+ 6,
+ 3,
+ 0,
+ 1,
+ 2,
+ 4,
+ 7,
+ },
+ ec_gf8_mul_28_ops};
+
+static ec_gf_op_t ec_gf8_mul_29_ops[] = {
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_29 = {8,
+ {
+ 4,
+ 6,
+ 3,
+ 5,
+ 7,
+ 0,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_29_ops};
+
+static ec_gf_op_t ec_gf8_mul_2A_ops[] = {
+ {EC_GF_OP_COPY, 8, 1, 0}, {EC_GF_OP_XOR2, 8, 0, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR3, 6, 8, 4}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_2A = {9,
+ {
+ 3,
+ 4,
+ 7,
+ 2,
+ 6,
+ 5,
+ 1,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_2A_ops};
+
+static ec_gf_op_t ec_gf8_mul_2B_ops[] = {
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_2B = {8,
+ {
+ 3,
+ 4,
+ 7,
+ 5,
+ 6,
+ 0,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_2B_ops};
+
+static ec_gf_op_t ec_gf8_mul_2C_ops[] = {
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_2C = {8,
+ {
+ 5,
+ 6,
+ 7,
+ 0,
+ 2,
+ 3,
+ 4,
+ 1,
+ },
+ ec_gf8_mul_2C_ops};
+
+static ec_gf_op_t ec_gf8_mul_2D_ops[] = {
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR3, 8, 4, 6},
+ {EC_GF_OP_XOR2, 5, 8, 0}, {EC_GF_OP_XOR2, 7, 8, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_2D = {9,
+ {
+ 7,
+ 0,
+ 3,
+ 5,
+ 1,
+ 4,
+ 2,
+ 6,
+ 8,
+ },
+ ec_gf8_mul_2D_ops};
+
+static ec_gf_op_t ec_gf8_mul_2E_ops[] = {
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_COPY, 8, 4, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 8, 7, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 2, 8, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 6, 8, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_2E = {9,
+ {
+ 5,
+ 0,
+ 7,
+ 3,
+ 2,
+ 6,
+ 4,
+ 1,
+ 8,
+ },
+ ec_gf8_mul_2E_ops};
+
+static ec_gf_op_t ec_gf8_mul_2F_ops[] = {
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR3, 8, 7, 6}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 3, 8, 0},
+ {EC_GF_OP_XOR2, 2, 8, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_2F = {9,
+ {
+ 6,
+ 3,
+ 2,
+ 5,
+ 7,
+ 0,
+ 1,
+ 4,
+ 8,
+ },
+ ec_gf8_mul_2F_ops};
+
+static ec_gf_op_t ec_gf8_mul_30_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 8, 1, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR3, 6, 8, 7},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_30 = {9,
+ {
+ 3,
+ 4,
+ 7,
+ 5,
+ 0,
+ 6,
+ 1,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_30_ops};
+
+static ec_gf_op_t ec_gf8_mul_31_ops[] = {
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_31 = {8,
+ {
+ 7,
+ 1,
+ 4,
+ 5,
+ 6,
+ 0,
+ 2,
+ 3,
+ },
+ ec_gf8_mul_31_ops};
+
+static ec_gf_op_t ec_gf8_mul_32_ops[] = {
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_32 = {8,
+ {
+ 3,
+ 4,
+ 6,
+ 7,
+ 5,
+ 0,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_32_ops};
+
+static ec_gf_op_t ec_gf8_mul_33_ops[] = {
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_33 = {8,
+ {
+ 5,
+ 4,
+ 3,
+ 0,
+ 2,
+ 1,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_33_ops};
+
+static ec_gf_op_t ec_gf8_mul_34_ops[] = {
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_34 = {8,
+ {
+ 7,
+ 5,
+ 3,
+ 0,
+ 2,
+ 4,
+ 1,
+ 6,
+ },
+ ec_gf8_mul_34_ops};
+
+static ec_gf_op_t ec_gf8_mul_35_ops[] = {
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_35 = {8,
+ {
+ 6,
+ 7,
+ 5,
+ 4,
+ 2,
+ 0,
+ 1,
+ 3,
+ },
+ ec_gf8_mul_35_ops};
+
+static ec_gf_op_t ec_gf8_mul_36_ops[] = {
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_36 = {8,
+ {
+ 6,
+ 7,
+ 4,
+ 1,
+ 2,
+ 3,
+ 0,
+ 5,
+ },
+ ec_gf8_mul_36_ops};
+
+static ec_gf_op_t ec_gf8_mul_37_ops[] = {
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR3, 8, 0, 1},
+ {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_XOR2, 7, 8, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_37 = {9,
+ {
+ 6,
+ 7,
+ 2,
+ 1,
+ 0,
+ 3,
+ 4,
+ 5,
+ 8,
+ },
+ ec_gf8_mul_37_ops};
+
+static ec_gf_op_t ec_gf8_mul_38_ops[] = {
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR3, 8, 6, 7},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_XOR2, 4, 8, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_38 = {9,
+ {
+ 4,
+ 5,
+ 6,
+ 3,
+ 0,
+ 1,
+ 7,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_38_ops};
+
+static ec_gf_op_t ec_gf8_mul_39_ops[] = {
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_39 = {8,
+ {
+ 1,
+ 6,
+ 3,
+ 0,
+ 5,
+ 2,
+ 4,
+ 7,
+ },
+ ec_gf8_mul_39_ops};
+
+static ec_gf_op_t ec_gf8_mul_3A_ops[] = {
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_3A = {8,
+ {
+ 3,
+ 4,
+ 7,
+ 0,
+ 5,
+ 6,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_3A_ops};
+
+static ec_gf_op_t ec_gf8_mul_3B_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR3, 8, 7, 3}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 1, 8, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_3B = {9,
+ {
+ 3,
+ 0,
+ 1,
+ 7,
+ 6,
+ 2,
+ 4,
+ 8,
+ 5,
+ },
+ ec_gf8_mul_3B_ops};
+
+static ec_gf_op_t ec_gf8_mul_3C_ops[] = {
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_3C = {8,
+ {
+ 3,
+ 6,
+ 4,
+ 1,
+ 7,
+ 2,
+ 0,
+ 5,
+ },
+ ec_gf8_mul_3C_ops};
+
+static ec_gf_op_t ec_gf8_mul_3D_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_3D = {8,
+ {
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ },
+ ec_gf8_mul_3D_ops};
+
+static ec_gf_op_t ec_gf8_mul_3E_ops[] = {
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_3E = {8,
+ {
+ 6,
+ 1,
+ 2,
+ 7,
+ 0,
+ 3,
+ 5,
+ 4,
+ },
+ ec_gf8_mul_3E_ops};
+
+static ec_gf_op_t ec_gf8_mul_3F_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_COPY, 10, 4, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_COPY, 9, 2, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR3, 4, 9, 7},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 3, 10, 0}, {EC_GF_OP_XOR2, 5, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_3F = {11,
+ {
+ 1,
+ 7,
+ 6,
+ 2,
+ 4,
+ 3,
+ 5,
+ 0,
+ 8,
+ 9,
+ 10,
+ },
+ ec_gf8_mul_3F_ops};
+
+static ec_gf_op_t ec_gf8_mul_40_ops[] = {
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR3, 8, 7, 6},
+ {EC_GF_OP_XOR2, 1, 8, 0}, {EC_GF_OP_XOR2, 5, 8, 0},
+ {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_40 = {9,
+ {
+ 5,
+ 7,
+ 4,
+ 6,
+ 2,
+ 3,
+ 0,
+ 1,
+ 8,
+ },
+ ec_gf8_mul_40_ops};
+
+static ec_gf_op_t ec_gf8_mul_41_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 8, 4, 0},
+ {EC_GF_OP_XOR2, 8, 5, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_41 = {9,
+ {
+ 0,
+ 7,
+ 6,
+ 5,
+ 3,
+ 4,
+ 8,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_41_ops};
+
+static ec_gf_op_t ec_gf8_mul_42_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 8, 3, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_42 = {9,
+ {
+ 2,
+ 7,
+ 1,
+ 6,
+ 4,
+ 3,
+ 0,
+ 5,
+ 8,
+ },
+ ec_gf8_mul_42_ops};
+
+static ec_gf_op_t ec_gf8_mul_43_ops[] = {
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_43 = {8,
+ {
+ 2,
+ 6,
+ 4,
+ 1,
+ 7,
+ 3,
+ 0,
+ 5,
+ },
+ ec_gf8_mul_43_ops};
+
+static ec_gf_op_t ec_gf8_mul_44_ops[] = {
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_44 = {8,
+ {
+ 2,
+ 3,
+ 4,
+ 1,
+ 6,
+ 5,
+ 0,
+ 7,
+ },
+ ec_gf8_mul_44_ops};
+
+static ec_gf_op_t ec_gf8_mul_45_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_45 = {8,
+ {
+ 2,
+ 3,
+ 0,
+ 1,
+ 7,
+ 4,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_45_ops};
+
+static ec_gf_op_t ec_gf8_mul_46_ops[] = {
+ {EC_GF_OP_XOR3, 8, 2, 4}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 8, 0, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 1, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_46 = {9,
+ {
+ 2,
+ 0,
+ 1,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ },
+ ec_gf8_mul_46_ops};
+
+static ec_gf_op_t ec_gf8_mul_47_ops[] = {
+ {EC_GF_OP_XOR3, 8, 0, 1}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_47 = {9,
+ {
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 8,
+ },
+ ec_gf8_mul_47_ops};
+
+static ec_gf_op_t ec_gf8_mul_48_ops[] = {
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_48 = {8,
+ {
+ 4,
+ 5,
+ 6,
+ 0,
+ 1,
+ 3,
+ 7,
+ 2,
+ },
+ ec_gf8_mul_48_ops};
+
+static ec_gf_op_t ec_gf8_mul_49_ops[] = {
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR3, 8, 0, 6},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 7, 8, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR3, 1, 8, 5},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_49 = {9,
+ {
+ 7,
+ 2,
+ 4,
+ 0,
+ 3,
+ 5,
+ 1,
+ 6,
+ 8,
+ },
+ ec_gf8_mul_49_ops};
+
+static ec_gf_op_t ec_gf8_mul_4A_ops[] = {
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_4A = {8,
+ {
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 3,
+ 4,
+ 2,
+ },
+ ec_gf8_mul_4A_ops};
+
+static ec_gf_op_t ec_gf8_mul_4B_ops[] = {
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR3, 8, 3, 7}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 4, 8, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 5, 8, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_4B = {9,
+ {
+ 5,
+ 3,
+ 6,
+ 7,
+ 0,
+ 2,
+ 4,
+ 1,
+ 8,
+ },
+ ec_gf8_mul_4B_ops};
+
+static ec_gf_op_t ec_gf8_mul_4C_ops[] = {
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_4C = {8,
+ {
+ 5,
+ 3,
+ 4,
+ 7,
+ 0,
+ 6,
+ 2,
+ 1,
+ },
+ ec_gf8_mul_4C_ops};
+
+static ec_gf_op_t ec_gf8_mul_4D_ops[] = {
+ {EC_GF_OP_COPY, 8, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR3, 9, 3, 1},
+ {EC_GF_OP_XOR2, 5, 9, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR3, 0, 8, 2},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_4D = {10,
+ {
+ 0,
+ 9,
+ 3,
+ 5,
+ 6,
+ 4,
+ 7,
+ 1,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_4D_ops};
+
+static ec_gf_op_t ec_gf8_mul_4E_ops[] = {
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_4E = {8,
+ {
+ 2,
+ 3,
+ 0,
+ 1,
+ 5,
+ 6,
+ 7,
+ 4,
+ },
+ ec_gf8_mul_4E_ops};
+
+static ec_gf_op_t ec_gf8_mul_4F_ops[] = {
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_4F = {8,
+ {
+ 0,
+ 3,
+ 5,
+ 6,
+ 1,
+ 2,
+ 7,
+ 4,
+ },
+ ec_gf8_mul_4F_ops};
+
+static ec_gf_op_t ec_gf8_mul_50_ops[] = {
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_50 = {8,
+ {
+ 4,
+ 5,
+ 7,
+ 3,
+ 0,
+ 1,
+ 2,
+ 6,
+ },
+ ec_gf8_mul_50_ops};
+
+static ec_gf_op_t ec_gf8_mul_51_ops[] = {
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_51 = {8,
+ {
+ 0,
+ 1,
+ 7,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_51_ops};
+
+static ec_gf_op_t ec_gf8_mul_52_ops[] = {
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_COPY, 9, 4, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR3, 3, 5, 8},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 2, 9, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_52 = {10,
+ {
+ 2,
+ 3,
+ 1,
+ 4,
+ 6,
+ 7,
+ 0,
+ 5,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_52_ops};
+
+static ec_gf_op_t ec_gf8_mul_53_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_53 = {8,
+ {
+ 2,
+ 0,
+ 1,
+ 4,
+ 5,
+ 6,
+ 7,
+ 3,
+ },
+ ec_gf8_mul_53_ops};
+
+static ec_gf_op_t ec_gf8_mul_54_ops[] = {
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_54 = {8,
+ {
+ 7,
+ 3,
+ 0,
+ 4,
+ 2,
+ 6,
+ 5,
+ 1,
+ },
+ ec_gf8_mul_54_ops};
+
+static ec_gf_op_t ec_gf8_mul_55_ops[] = {
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_55 = {8,
+ {
+ 1,
+ 5,
+ 6,
+ 4,
+ 3,
+ 7,
+ 2,
+ 0,
+ },
+ ec_gf8_mul_55_ops};
+
+static ec_gf_op_t ec_gf8_mul_56_ops[] = {
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_56 = {8,
+ {
+ 2,
+ 3,
+ 0,
+ 4,
+ 5,
+ 6,
+ 7,
+ 1,
+ },
+ ec_gf8_mul_56_ops};
+
+static ec_gf_op_t ec_gf8_mul_57_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_57 = {8,
+ {
+ 2,
+ 3,
+ 0,
+ 1,
+ 4,
+ 5,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_57_ops};
+
+static ec_gf_op_t ec_gf8_mul_58_ops[] = {
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_58 = {8,
+ {
+ 4,
+ 3,
+ 2,
+ 7,
+ 0,
+ 1,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_58_ops};
+
+static ec_gf_op_t ec_gf8_mul_59_ops[] = {
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_59 = {8,
+ {
+ 7,
+ 3,
+ 5,
+ 6,
+ 1,
+ 2,
+ 0,
+ 4,
+ },
+ ec_gf8_mul_59_ops};
+
+static ec_gf_op_t ec_gf8_mul_5A_ops[] = {
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_5A = {8,
+ {
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 5,
+ 4,
+ },
+ ec_gf8_mul_5A_ops};
+
+static ec_gf_op_t ec_gf8_mul_5B_ops[] = {
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_5B = {8,
+ {
+ 6,
+ 0,
+ 7,
+ 5,
+ 2,
+ 1,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_5B_ops};
+
+static ec_gf_op_t ec_gf8_mul_5C_ops[] = {
+ {EC_GF_OP_COPY, 8, 3, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 2, 8, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_5C = {9,
+ {
+ 7,
+ 5,
+ 2,
+ 4,
+ 1,
+ 0,
+ 6,
+ 3,
+ 8,
+ },
+ ec_gf8_mul_5C_ops};
+
+static ec_gf_op_t ec_gf8_mul_5D_ops[] = {
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_5D = {8,
+ {
+ 1,
+ 3,
+ 5,
+ 4,
+ 6,
+ 7,
+ 2,
+ 0,
+ },
+ ec_gf8_mul_5D_ops};
+
+static ec_gf_op_t ec_gf8_mul_5E_ops[] = {
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_5E = {8,
+ {
+ 4,
+ 3,
+ 6,
+ 2,
+ 5,
+ 7,
+ 0,
+ 1,
+ },
+ ec_gf8_mul_5E_ops};
+
+static ec_gf_op_t ec_gf8_mul_5F_ops[] = {
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_5F = {8,
+ {
+ 6,
+ 1,
+ 3,
+ 4,
+ 5,
+ 7,
+ 2,
+ 0,
+ },
+ ec_gf8_mul_5F_ops};
+
+static ec_gf_op_t ec_gf8_mul_60_ops[] = {
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_60 = {8,
+ {
+ 2,
+ 3,
+ 4,
+ 7,
+ 5,
+ 6,
+ 0,
+ 1,
+ },
+ ec_gf8_mul_60_ops};
+
+static ec_gf_op_t ec_gf8_mul_61_ops[] = {
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_61 = {8,
+ {
+ 0,
+ 5,
+ 6,
+ 7,
+ 4,
+ 2,
+ 1,
+ 3,
+ },
+ ec_gf8_mul_61_ops};
+
+static ec_gf_op_t ec_gf8_mul_62_ops[] = {
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_62 = {8,
+ {
+ 2,
+ 0,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 1,
+ },
+ ec_gf8_mul_62_ops};
+
+static ec_gf_op_t ec_gf8_mul_63_ops[] = {
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_63 = {8,
+ {
+ 3,
+ 4,
+ 6,
+ 5,
+ 7,
+ 0,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_63_ops};
+
+static ec_gf_op_t ec_gf8_mul_64_ops[] = {
+ {EC_GF_OP_COPY, 8, 1, 0}, {EC_GF_OP_XOR2, 8, 0, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 8, 7, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_64 = {9,
+ {
+ 2,
+ 3,
+ 4,
+ 6,
+ 5,
+ 7,
+ 8,
+ 1,
+ 0,
+ },
+ ec_gf8_mul_64_ops};
+
+static ec_gf_op_t ec_gf8_mul_65_ops[] = {
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_65 = {8,
+ {
+ 2,
+ 5,
+ 1,
+ 3,
+ 4,
+ 0,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_65_ops};
+
+static ec_gf_op_t ec_gf8_mul_66_ops[] = {
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_66 = {8,
+ {
+ 2,
+ 3,
+ 1,
+ 4,
+ 5,
+ 7,
+ 0,
+ 6,
+ },
+ ec_gf8_mul_66_ops};
+
+static ec_gf_op_t ec_gf8_mul_67_ops[] = {
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_67 = {8,
+ {
+ 2,
+ 4,
+ 5,
+ 6,
+ 7,
+ 3,
+ 1,
+ 0,
+ },
+ ec_gf8_mul_67_ops};
+
+static ec_gf_op_t ec_gf8_mul_68_ops[] = {
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_68 = {8,
+ {
+ 5,
+ 7,
+ 2,
+ 3,
+ 0,
+ 6,
+ 4,
+ 1,
+ },
+ ec_gf8_mul_68_ops};
+
+static ec_gf_op_t ec_gf8_mul_69_ops[] = {
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_69 = {8,
+ {
+ 0,
+ 1,
+ 3,
+ 2,
+ 4,
+ 5,
+ 7,
+ 6,
+ },
+ ec_gf8_mul_69_ops};
+
+static ec_gf_op_t ec_gf8_mul_6A_ops[] = {
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_6A = {8,
+ {
+ 5,
+ 7,
+ 4,
+ 6,
+ 1,
+ 2,
+ 0,
+ 3,
+ },
+ ec_gf8_mul_6A_ops};
+
+static ec_gf_op_t ec_gf8_mul_6B_ops[] = {
+ {EC_GF_OP_COPY, 8, 1, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_6B = {9,
+ {
+ 6,
+ 7,
+ 2,
+ 0,
+ 3,
+ 1,
+ 5,
+ 4,
+ 8,
+ },
+ ec_gf8_mul_6B_ops};
+
+static ec_gf_op_t ec_gf8_mul_6C_ops[] = {
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_6C = {8,
+ {
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_6C_ops};
+
+static ec_gf_op_t ec_gf8_mul_6D_ops[] = {
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR3, 8, 3, 4}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 6, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_6D = {9,
+ {
+ 3,
+ 6,
+ 7,
+ 0,
+ 4,
+ 5,
+ 1,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_6D_ops};
+
+static ec_gf_op_t ec_gf8_mul_6E_ops[] = {
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_6E = {8,
+ {
+ 5,
+ 6,
+ 3,
+ 1,
+ 7,
+ 2,
+ 0,
+ 4,
+ },
+ ec_gf8_mul_6E_ops};
+
+static ec_gf_op_t ec_gf8_mul_6F_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR3, 0, 8, 7}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_6F = {9,
+ {
+ 2,
+ 6,
+ 3,
+ 7,
+ 0,
+ 1,
+ 4,
+ 5,
+ 8,
+ },
+ ec_gf8_mul_6F_ops};
+
+static ec_gf_op_t ec_gf8_mul_70_ops[] = {
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_70 = {8,
+ {
+ 3,
+ 4,
+ 5,
+ 2,
+ 6,
+ 0,
+ 1,
+ 7,
+ },
+ ec_gf8_mul_70_ops};
+
+static ec_gf_op_t ec_gf8_mul_71_ops[] = {
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_71 = {8,
+ {
+ 4,
+ 7,
+ 5,
+ 3,
+ 6,
+ 0,
+ 2,
+ 1,
+ },
+ ec_gf8_mul_71_ops};
+
+static ec_gf_op_t ec_gf8_mul_72_ops[] = {
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_72 = {8,
+ {
+ 0,
+ 5,
+ 2,
+ 7,
+ 4,
+ 1,
+ 3,
+ 6,
+ },
+ ec_gf8_mul_72_ops};
+
+static ec_gf_op_t ec_gf8_mul_73_ops[] = {
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_73 = {8,
+ {
+ 6,
+ 0,
+ 1,
+ 7,
+ 4,
+ 5,
+ 2,
+ 3,
+ },
+ ec_gf8_mul_73_ops};
+
+static ec_gf_op_t ec_gf8_mul_74_ops[] = {
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_74 = {8,
+ {
+ 3,
+ 2,
+ 1,
+ 0,
+ 4,
+ 5,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_74_ops};
+
+static ec_gf_op_t ec_gf8_mul_75_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_75 = {8,
+ {
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ },
+ ec_gf8_mul_75_ops};
+
+static ec_gf_op_t ec_gf8_mul_76_ops[] = {
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR3, 8, 6, 2},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 0, 8, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_76 = {9,
+ {
+ 2,
+ 3,
+ 0,
+ 6,
+ 5,
+ 1,
+ 7,
+ 8,
+ 4,
+ },
+ ec_gf8_mul_76_ops};
+
+static ec_gf_op_t ec_gf8_mul_77_ops[] = {
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_77 = {8,
+ {
+ 7,
+ 4,
+ 3,
+ 6,
+ 0,
+ 1,
+ 5,
+ 2,
+ },
+ ec_gf8_mul_77_ops};
+
+static ec_gf_op_t ec_gf8_mul_78_ops[] = {
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR3, 8, 0, 2},
+ {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_XOR2, 1, 8, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_78 = {9,
+ {
+ 4,
+ 7,
+ 3,
+ 2,
+ 5,
+ 1,
+ 6,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_78_ops};
+
+static ec_gf_op_t ec_gf8_mul_79_ops[] = {
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR3, 8, 4, 7},
+ {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 6, 8, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_79 = {9,
+ {
+ 4,
+ 5,
+ 7,
+ 3,
+ 1,
+ 6,
+ 2,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_79_ops};
+
+static ec_gf_op_t ec_gf8_mul_7A_ops[] = {
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_7A = {8,
+ {
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ },
+ ec_gf8_mul_7A_ops};
+
+static ec_gf_op_t ec_gf8_mul_7B_ops[] = {
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR3, 8, 5, 3},
+ {EC_GF_OP_XOR2, 8, 0, 0}, {EC_GF_OP_COPY, 9, 4, 0},
+ {EC_GF_OP_XOR2, 8, 2, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR3, 4, 1, 9},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_7B = {10,
+ {
+ 1,
+ 2,
+ 3,
+ 4,
+ 8,
+ 5,
+ 6,
+ 0,
+ 7,
+ 9,
+ },
+ ec_gf8_mul_7B_ops};
+
+static ec_gf_op_t ec_gf8_mul_7C_ops[] = {
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_7C = {8,
+ {
+ 2,
+ 4,
+ 1,
+ 6,
+ 3,
+ 5,
+ 7,
+ 0,
+ },
+ ec_gf8_mul_7C_ops};
+
+static ec_gf_op_t ec_gf8_mul_7D_ops[] = {
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_7D = {8,
+ {
+ 1,
+ 0,
+ 3,
+ 5,
+ 6,
+ 7,
+ 2,
+ 4,
+ },
+ ec_gf8_mul_7D_ops};
+
+static ec_gf_op_t ec_gf8_mul_7E_ops[] = {
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_COPY, 8, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR3, 6, 2, 7},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 6, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_7E = {9,
+ {
+ 5,
+ 1,
+ 2,
+ 0,
+ 7,
+ 3,
+ 4,
+ 6,
+ 8,
+ },
+ ec_gf8_mul_7E_ops};
+
+static ec_gf_op_t ec_gf8_mul_7F_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR3, 9, 7, 5}, {EC_GF_OP_XOR2, 2, 9, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 6, 9, 0},
+ {EC_GF_OP_XOR3, 9, 6, 4}, {EC_GF_OP_XOR2, 7, 9, 0},
+ {EC_GF_OP_XOR2, 3, 9, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_7F = {10,
+ {
+ 4,
+ 1,
+ 0,
+ 5,
+ 6,
+ 7,
+ 2,
+ 3,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_7F_ops};
+
+static ec_gf_op_t ec_gf8_mul_80_ops[] = {
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_80 = {8,
+ {
+ 7,
+ 5,
+ 6,
+ 4,
+ 1,
+ 2,
+ 3,
+ 0,
+ },
+ ec_gf8_mul_80_ops};
+
+static ec_gf_op_t ec_gf8_mul_81_ops[] = {
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_81 = {8,
+ {
+ 2,
+ 7,
+ 4,
+ 1,
+ 5,
+ 6,
+ 3,
+ 0,
+ },
+ ec_gf8_mul_81_ops};
+
+static ec_gf_op_t ec_gf8_mul_82_ops[] = {
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_COPY, 8, 6, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR3, 5, 8, 7}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_82 = {9,
+ {
+ 6,
+ 2,
+ 7,
+ 5,
+ 1,
+ 3,
+ 4,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_82_ops};
+
+static ec_gf_op_t ec_gf8_mul_83_ops[] = {
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_83 = {8,
+ {
+ 3,
+ 5,
+ 6,
+ 7,
+ 1,
+ 2,
+ 4,
+ 0,
+ },
+ ec_gf8_mul_83_ops};
+
+static ec_gf_op_t ec_gf8_mul_84_ops[] = {
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_84 = {8,
+ {
+ 7,
+ 6,
+ 0,
+ 4,
+ 1,
+ 5,
+ 3,
+ 2,
+ },
+ ec_gf8_mul_84_ops};
+
+static ec_gf_op_t ec_gf8_mul_85_ops[] = {
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_85 = {8,
+ {
+ 7,
+ 6,
+ 0,
+ 3,
+ 2,
+ 4,
+ 5,
+ 1,
+ },
+ ec_gf8_mul_85_ops};
+
+static ec_gf_op_t ec_gf8_mul_86_ops[] = {
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_86 = {8,
+ {
+ 1,
+ 2,
+ 6,
+ 4,
+ 5,
+ 7,
+ 3,
+ 0,
+ },
+ ec_gf8_mul_86_ops};
+
+static ec_gf_op_t ec_gf8_mul_87_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_COPY, 8, 1, 0},
+ {EC_GF_OP_XOR2, 8, 6, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR3, 5, 8, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 2, 8, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_87 = {9,
+ {
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 7,
+ 6,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_87_ops};
+
+static ec_gf_op_t ec_gf8_mul_88_ops[] = {
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_88 = {8,
+ {
+ 6,
+ 7,
+ 3,
+ 1,
+ 2,
+ 4,
+ 5,
+ 0,
+ },
+ ec_gf8_mul_88_ops};
+
+static ec_gf_op_t ec_gf8_mul_89_ops[] = {
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR3, 8, 5, 2},
+ {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_89 = {9,
+ {
+ 2,
+ 1,
+ 6,
+ 5,
+ 7,
+ 3,
+ 4,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_89_ops};
+
+static ec_gf_op_t ec_gf8_mul_8A_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_8A = {8,
+ {
+ 1,
+ 2,
+ 3,
+ 0,
+ 6,
+ 7,
+ 4,
+ 5,
+ },
+ ec_gf8_mul_8A_ops};
+
+static ec_gf_op_t ec_gf8_mul_8B_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_8B = {8,
+ {
+ 6,
+ 1,
+ 2,
+ 3,
+ 5,
+ 7,
+ 4,
+ 0,
+ },
+ ec_gf8_mul_8B_ops};
+
+static ec_gf_op_t ec_gf8_mul_8C_ops[] = {
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_8C = {8,
+ {
+ 1,
+ 2,
+ 0,
+ 7,
+ 3,
+ 4,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_8C_ops};
+
+static ec_gf_op_t ec_gf8_mul_8D_ops[] = {
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_8D = {8,
+ {
+ 7,
+ 1,
+ 3,
+ 2,
+ 4,
+ 5,
+ 0,
+ 6,
+ },
+ ec_gf8_mul_8D_ops};
+
+static ec_gf_op_t ec_gf8_mul_8E_ops[] = {{EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_8E = {8,
+ {
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ },
+ ec_gf8_mul_8E_ops};
+
+static ec_gf_op_t ec_gf8_mul_8F_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_8F = {8,
+ {
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ },
+ ec_gf8_mul_8F_ops};
+
+static ec_gf_op_t ec_gf8_mul_90_ops[] = {
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_90 = {8,
+ {
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 3,
+ 2,
+ },
+ ec_gf8_mul_90_ops};
+
+static ec_gf_op_t ec_gf8_mul_91_ops[] = {
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_COPY, 9, 1, 0}, {EC_GF_OP_COPY, 8, 3, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 7, 9, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR3, 5, 8, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_91 = {10,
+ {
+ 2,
+ 3,
+ 1,
+ 4,
+ 0,
+ 6,
+ 7,
+ 5,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_91_ops};
+
+static ec_gf_op_t ec_gf8_mul_92_ops[] = {
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_92 = {8,
+ {
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 5,
+ 4,
+ },
+ ec_gf8_mul_92_ops};
+
+static ec_gf_op_t ec_gf8_mul_93_ops[] = {
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_93 = {8,
+ {
+ 6,
+ 4,
+ 5,
+ 1,
+ 7,
+ 2,
+ 3,
+ 0,
+ },
+ ec_gf8_mul_93_ops};
+
+static ec_gf_op_t ec_gf8_mul_94_ops[] = {
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_94 = {8,
+ {
+ 7,
+ 5,
+ 0,
+ 2,
+ 6,
+ 1,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_94_ops};
+
+static ec_gf_op_t ec_gf8_mul_95_ops[] = {
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_95 = {8,
+ {
+ 7,
+ 6,
+ 1,
+ 3,
+ 0,
+ 4,
+ 5,
+ 2,
+ },
+ ec_gf8_mul_95_ops};
+
+static ec_gf_op_t ec_gf8_mul_96_ops[] = {
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR3, 8, 0, 4}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 8, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 1, 8, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 5, 8, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_96 = {9,
+ {
+ 4,
+ 0,
+ 1,
+ 6,
+ 7,
+ 2,
+ 3,
+ 5,
+ 8,
+ },
+ ec_gf8_mul_96_ops};
+
+static ec_gf_op_t ec_gf8_mul_97_ops[] = {
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_COPY, 8, 2, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 8, 6, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 1, 8, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 5, 8, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_97 = {9,
+ {
+ 4,
+ 5,
+ 3,
+ 6,
+ 7,
+ 1,
+ 2,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_97_ops};
+
+static ec_gf_op_t ec_gf8_mul_98_ops[] = {
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_98 = {8,
+ {
+ 4,
+ 2,
+ 3,
+ 6,
+ 7,
+ 5,
+ 1,
+ 0,
+ },
+ ec_gf8_mul_98_ops};
+
+static ec_gf_op_t ec_gf8_mul_99_ops[] = {
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_99 = {8,
+ {
+ 6,
+ 5,
+ 3,
+ 7,
+ 0,
+ 1,
+ 4,
+ 2,
+ },
+ ec_gf8_mul_99_ops};
+
+static ec_gf_op_t ec_gf8_mul_9A_ops[] = {
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR3, 8, 4, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_XOR2, 1, 8, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_9A = {9,
+ {
+ 6,
+ 3,
+ 4,
+ 0,
+ 5,
+ 1,
+ 2,
+ 7,
+ 8,
+ },
+ ec_gf8_mul_9A_ops};
+
+static ec_gf_op_t ec_gf8_mul_9B_ops[] = {
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_COPY, 9, 5, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR3, 8, 3, 2}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 3, 9, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_9B = {10,
+ {
+ 4,
+ 5,
+ 8,
+ 6,
+ 7,
+ 1,
+ 2,
+ 0,
+ 3,
+ 9,
+ },
+ ec_gf8_mul_9B_ops};
+
+static ec_gf_op_t ec_gf8_mul_9C_ops[] = {
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_9C = {8,
+ {
+ 3,
+ 2,
+ 1,
+ 0,
+ 4,
+ 5,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_9C_ops};
+
+static ec_gf_op_t ec_gf8_mul_9D_ops[] = {
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_9D = {8,
+ {
+ 0,
+ 1,
+ 2,
+ 3,
+ 7,
+ 4,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_9D_ops};
+
+static ec_gf_op_t ec_gf8_mul_9E_ops[] = {
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_COPY, 8, 7, 0},
+ {EC_GF_OP_XOR2, 8, 5, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_9E = {9,
+ {
+ 4,
+ 5,
+ 3,
+ 8,
+ 6,
+ 0,
+ 2,
+ 7,
+ 1,
+ },
+ ec_gf8_mul_9E_ops};
+
+static ec_gf_op_t ec_gf8_mul_9F_ops[] = {
+ {EC_GF_OP_XOR3, 8, 1, 2}, {EC_GF_OP_XOR2, 8, 3, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 6, 8, 0}, {EC_GF_OP_XOR2, 5, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_9F = {9,
+ {
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 8,
+ },
+ ec_gf8_mul_9F_ops};
+
+static ec_gf_op_t ec_gf8_mul_A0_ops[] = {
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_A0 = {8,
+ {
+ 3,
+ 1,
+ 6,
+ 7,
+ 5,
+ 2,
+ 4,
+ 0,
+ },
+ ec_gf8_mul_A0_ops};
+
+static ec_gf_op_t ec_gf8_mul_A1_ops[] = {
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR3, 8, 0, 6},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 7, 8, 0},
+ {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_A1 = {9,
+ {
+ 7,
+ 4,
+ 1,
+ 5,
+ 6,
+ 0,
+ 2,
+ 3,
+ 8,
+ },
+ ec_gf8_mul_A1_ops};
+
+static ec_gf_op_t ec_gf8_mul_A2_ops[] = {
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_A2 = {8,
+ {
+ 7,
+ 0,
+ 6,
+ 3,
+ 2,
+ 1,
+ 4,
+ 5,
+ },
+ ec_gf8_mul_A2_ops};
+
+static ec_gf_op_t ec_gf8_mul_A3_ops[] = {
+ {EC_GF_OP_COPY, 8, 2, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 3, 8, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_A3 = {9,
+ {
+ 3,
+ 7,
+ 2,
+ 6,
+ 1,
+ 4,
+ 0,
+ 5,
+ 8,
+ },
+ ec_gf8_mul_A3_ops};
+
+static ec_gf_op_t ec_gf8_mul_A4_ops[] = {
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_A4 = {8,
+ {
+ 5,
+ 6,
+ 7,
+ 2,
+ 4,
+ 3,
+ 0,
+ 1,
+ },
+ ec_gf8_mul_A4_ops};
+
+static ec_gf_op_t ec_gf8_mul_A5_ops[] = {
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR3, 8, 5, 6}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_XOR2, 7, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_A5 = {9,
+ {
+ 1,
+ 4,
+ 2,
+ 5,
+ 6,
+ 7,
+ 3,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_A5_ops};
+
+static ec_gf_op_t ec_gf8_mul_A6_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_A6 = {8,
+ {
+ 1,
+ 2,
+ 0,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_A6_ops};
+
+static ec_gf_op_t ec_gf8_mul_A7_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_A7 = {8,
+ {
+ 0,
+ 1,
+ 2,
+ 5,
+ 6,
+ 7,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_A7_ops};
+
+static ec_gf_op_t ec_gf8_mul_A8_ops[] = {
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 8, 1, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_COPY, 9, 4, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 8, 3, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 2, 9, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_A8 = {10,
+ {
+ 1,
+ 7,
+ 5,
+ 8,
+ 6,
+ 3,
+ 4,
+ 0,
+ 2,
+ 9,
+ },
+ ec_gf8_mul_A8_ops};
+
+static ec_gf_op_t ec_gf8_mul_A9_ops[] = {
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_A9 = {8,
+ {
+ 3,
+ 7,
+ 6,
+ 1,
+ 2,
+ 0,
+ 4,
+ 5,
+ },
+ ec_gf8_mul_A9_ops};
+
+static ec_gf_op_t ec_gf8_mul_AA_ops[] = {
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_AA = {8,
+ {
+ 0,
+ 4,
+ 5,
+ 3,
+ 6,
+ 7,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_AA_ops};
+
+static ec_gf_op_t ec_gf8_mul_AB_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_COPY, 9, 6, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 8, 7, 0}, {EC_GF_OP_XOR2, 3, 8, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR3, 3, 9, 7},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_AB = {10,
+ {
+ 2,
+ 3,
+ 8,
+ 0,
+ 5,
+ 6,
+ 1,
+ 4,
+ 7,
+ 9,
+ },
+ ec_gf8_mul_AB_ops};
+
+static ec_gf_op_t ec_gf8_mul_AC_ops[] = {
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_AC = {8,
+ {
+ 3,
+ 2,
+ 1,
+ 0,
+ 4,
+ 5,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_AC_ops};
+
+static ec_gf_op_t ec_gf8_mul_AD_ops[] = {
+ {EC_GF_OP_XOR3, 8, 1, 2}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 8, 0},
+ {EC_GF_OP_XOR2, 5, 8, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_AD = {9,
+ {
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_AD_ops};
+
+static ec_gf_op_t ec_gf8_mul_AE_ops[] = {
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_COPY, 8, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 4, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_AE = {9,
+ {
+ 7,
+ 0,
+ 5,
+ 6,
+ 3,
+ 4,
+ 1,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_AE_ops};
+
+static ec_gf_op_t ec_gf8_mul_AF_ops[] = {
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_AF = {8,
+ {
+ 0,
+ 1,
+ 2,
+ 7,
+ 3,
+ 4,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_AF_ops};
+
+static ec_gf_op_t ec_gf8_mul_B0_ops[] = {
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_B0 = {8,
+ {
+ 4,
+ 0,
+ 7,
+ 2,
+ 3,
+ 1,
+ 6,
+ 5,
+ },
+ ec_gf8_mul_B0_ops};
+
+static ec_gf_op_t ec_gf8_mul_B1_ops[] = {
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_COPY, 8, 4, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR3, 5, 8, 1}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_B1 = {9,
+ {
+ 2,
+ 6,
+ 4,
+ 7,
+ 0,
+ 1,
+ 3,
+ 5,
+ 8,
+ },
+ ec_gf8_mul_B1_ops};
+
+static ec_gf_op_t ec_gf8_mul_B2_ops[] = {
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR3, 8, 4, 5},
+ {EC_GF_OP_XOR2, 2, 8, 0}, {EC_GF_OP_XOR2, 8, 1, 0},
+ {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_XOR2, 3, 8, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_B2 = {9,
+ {
+ 0,
+ 7,
+ 4,
+ 5,
+ 6,
+ 1,
+ 2,
+ 3,
+ 8,
+ },
+ ec_gf8_mul_B2_ops};
+
+static ec_gf_op_t ec_gf8_mul_B3_ops[] = {
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_COPY, 9, 5, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR3, 8, 6, 4},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 8, 5, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 7, 8, 0},
+ {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR3, 1, 9, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_B3 = {10,
+ {
+ 2,
+ 3,
+ 4,
+ 5,
+ 1,
+ 6,
+ 0,
+ 7,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_B3_ops};
+
+static ec_gf_op_t ec_gf8_mul_B4_ops[] = {
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_B4 = {8,
+ {
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_B4_ops};
+
+static ec_gf_op_t ec_gf8_mul_B5_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_COPY, 8, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR3, 4, 8, 3}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_B5 = {9,
+ {
+ 3,
+ 4,
+ 0,
+ 7,
+ 1,
+ 5,
+ 6,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_B5_ops};
+
+static ec_gf_op_t ec_gf8_mul_B6_ops[] = {
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_B6 = {8,
+ {
+ 5,
+ 3,
+ 6,
+ 4,
+ 7,
+ 0,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_B6_ops};
+
+static ec_gf_op_t ec_gf8_mul_B7_ops[] = {
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_B7 = {8,
+ {
+ 5,
+ 0,
+ 1,
+ 4,
+ 2,
+ 6,
+ 7,
+ 3,
+ },
+ ec_gf8_mul_B7_ops};
+
+static ec_gf_op_t ec_gf8_mul_B8_ops[] = {
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_B8 = {8,
+ {
+ 6,
+ 4,
+ 5,
+ 1,
+ 2,
+ 0,
+ 7,
+ 3,
+ },
+ ec_gf8_mul_B8_ops};
+
+static ec_gf_op_t ec_gf8_mul_B9_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR3, 0, 8, 2}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_B9 = {9,
+ {
+ 6,
+ 7,
+ 0,
+ 2,
+ 1,
+ 4,
+ 5,
+ 3,
+ 8,
+ },
+ ec_gf8_mul_B9_ops};
+
+static ec_gf_op_t ec_gf8_mul_BA_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_BA = {8,
+ {
+ 1,
+ 2,
+ 4,
+ 3,
+ 5,
+ 6,
+ 0,
+ 7,
+ },
+ ec_gf8_mul_BA_ops};
+
+static ec_gf_op_t ec_gf8_mul_BB_ops[] = {
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_COPY, 8, 3, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 8, 5, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 8, 7, 0}, {EC_GF_OP_XOR2, 2, 8, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_BB = {9,
+ {
+ 7,
+ 2,
+ 1,
+ 8,
+ 3,
+ 5,
+ 6,
+ 4,
+ 0,
+ },
+ ec_gf8_mul_BB_ops};
+
+static ec_gf_op_t ec_gf8_mul_BC_ops[] = {
+ {EC_GF_OP_COPY, 8, 1, 0}, {EC_GF_OP_XOR2, 8, 2, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_XOR3, 2, 8, 4},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_BC = {9,
+ {
+ 2,
+ 6,
+ 3,
+ 4,
+ 5,
+ 1,
+ 7,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_BC_ops};
+
+static ec_gf_op_t ec_gf8_mul_BD_ops[] = {
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_BD = {8,
+ {
+ 4,
+ 5,
+ 0,
+ 2,
+ 7,
+ 1,
+ 6,
+ 3,
+ },
+ ec_gf8_mul_BD_ops};
+
+static ec_gf_op_t ec_gf8_mul_BE_ops[] = {
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_BE = {8,
+ {
+ 0,
+ 6,
+ 7,
+ 4,
+ 5,
+ 1,
+ 3,
+ 2,
+ },
+ ec_gf8_mul_BE_ops};
+
+static ec_gf_op_t ec_gf8_mul_BF_ops[] = {
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_BF = {8,
+ {
+ 5,
+ 6,
+ 1,
+ 7,
+ 3,
+ 0,
+ 2,
+ 4,
+ },
+ ec_gf8_mul_BF_ops};
+
+static ec_gf_op_t ec_gf8_mul_C0_ops[] = {
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_C0 = {8,
+ {
+ 1,
+ 2,
+ 3,
+ 4,
+ 7,
+ 5,
+ 6,
+ 0,
+ },
+ ec_gf8_mul_C0_ops};
+
+static ec_gf_op_t ec_gf8_mul_C1_ops[] = {
+ {EC_GF_OP_XOR3, 8, 1, 2}, {EC_GF_OP_XOR2, 8, 3, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 6, 8, 0}, {EC_GF_OP_XOR2, 5, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_C1 = {9,
+ {
+ 5,
+ 6,
+ 7,
+ 4,
+ 1,
+ 2,
+ 3,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_C1_ops};
+
+static ec_gf_op_t ec_gf8_mul_C2_ops[] = {
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_C2 = {8,
+ {
+ 7,
+ 6,
+ 3,
+ 0,
+ 1,
+ 4,
+ 5,
+ 2,
+ },
+ ec_gf8_mul_C2_ops};
+
+static ec_gf_op_t ec_gf8_mul_C3_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR3, 0, 2, 6}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR3, 9, 1, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 7, 9, 0},
+ {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_C3 = {10,
+ {
+ 5,
+ 6,
+ 4,
+ 7,
+ 1,
+ 2,
+ 3,
+ 0,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_C3_ops};
+
+static ec_gf_op_t ec_gf8_mul_C4_ops[] = {
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_C4 = {8,
+ {
+ 0,
+ 2,
+ 1,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_C4_ops};
+
+static ec_gf_op_t ec_gf8_mul_C5_ops[] = {
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_C5 = {8,
+ {
+ 4,
+ 3,
+ 5,
+ 7,
+ 6,
+ 2,
+ 0,
+ 1,
+ },
+ ec_gf8_mul_C5_ops};
+
+static ec_gf_op_t ec_gf8_mul_C6_ops[] = {
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_COPY, 8, 4, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR3, 9, 5, 4},
+ {EC_GF_OP_XOR2, 6, 9, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 7, 9, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 6, 8, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_C6 = {10,
+ {
+ 6,
+ 3,
+ 0,
+ 4,
+ 5,
+ 7,
+ 2,
+ 1,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_C6_ops};
+
+static ec_gf_op_t ec_gf8_mul_C7_ops[] = {
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_C7 = {8,
+ {
+ 7,
+ 0,
+ 6,
+ 2,
+ 5,
+ 3,
+ 4,
+ 1,
+ },
+ ec_gf8_mul_C7_ops};
+
+static ec_gf_op_t ec_gf8_mul_C8_ops[] = {
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_C8 = {8,
+ {
+ 1,
+ 3,
+ 2,
+ 4,
+ 6,
+ 7,
+ 5,
+ 0,
+ },
+ ec_gf8_mul_C8_ops};
+
+static ec_gf_op_t ec_gf8_mul_C9_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_C9 = {8,
+ {
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ },
+ ec_gf8_mul_C9_ops};
+
+static ec_gf_op_t ec_gf8_mul_CA_ops[] = {
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_CA = {8,
+ {
+ 1,
+ 2,
+ 5,
+ 7,
+ 3,
+ 4,
+ 0,
+ 6,
+ },
+ ec_gf8_mul_CA_ops};
+
+static ec_gf_op_t ec_gf8_mul_CB_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_CB = {8,
+ {
+ 2,
+ 3,
+ 4,
+ 5,
+ 7,
+ 6,
+ 0,
+ 1,
+ },
+ ec_gf8_mul_CB_ops};
+
+static ec_gf_op_t ec_gf8_mul_CC_ops[] = {
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_CC = {8,
+ {
+ 2,
+ 7,
+ 1,
+ 0,
+ 5,
+ 6,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_CC_ops};
+
+static ec_gf_op_t ec_gf8_mul_CD_ops[] = {
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_CD = {8,
+ {
+ 0,
+ 6,
+ 1,
+ 2,
+ 7,
+ 3,
+ 4,
+ 5,
+ },
+ ec_gf8_mul_CD_ops};
+
+static ec_gf_op_t ec_gf8_mul_CE_ops[] = {
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_COPY, 8, 7, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR3, 3, 6, 8},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR3, 8, 2, 3},
+ {EC_GF_OP_XOR2, 1, 8, 0}, {EC_GF_OP_XOR2, 4, 8, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_CE = {9,
+ {
+ 5,
+ 7,
+ 3,
+ 0,
+ 2,
+ 6,
+ 4,
+ 1,
+ 8,
+ },
+ ec_gf8_mul_CE_ops};
+
+static ec_gf_op_t ec_gf8_mul_CF_ops[] = {
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_CF = {8,
+ {
+ 3,
+ 6,
+ 7,
+ 0,
+ 2,
+ 4,
+ 5,
+ 1,
+ },
+ ec_gf8_mul_CF_ops};
+
+static ec_gf_op_t ec_gf8_mul_D0_ops[] = {
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_D0 = {8,
+ {
+ 5,
+ 6,
+ 7,
+ 2,
+ 0,
+ 3,
+ 1,
+ 4,
+ },
+ ec_gf8_mul_D0_ops};
+
+static ec_gf_op_t ec_gf8_mul_D1_ops[] = {
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR3, 8, 6, 0},
+ {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_XOR2, 1, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_D1 = {9,
+ {
+ 5,
+ 6,
+ 3,
+ 2,
+ 0,
+ 7,
+ 4,
+ 1,
+ 8,
+ },
+ ec_gf8_mul_D1_ops};
+
+static ec_gf_op_t ec_gf8_mul_D2_ops[] = {
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_D2 = {8,
+ {
+ 7,
+ 0,
+ 2,
+ 1,
+ 3,
+ 4,
+ 6,
+ 5,
+ },
+ ec_gf8_mul_D2_ops};
+
+static ec_gf_op_t ec_gf8_mul_D3_ops[] = {
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_COPY, 8, 4, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 8, 6, 0}, {EC_GF_OP_XOR2, 3, 8, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_D3 = {9,
+ {
+ 0,
+ 3,
+ 2,
+ 8,
+ 4,
+ 6,
+ 7,
+ 1,
+ 5,
+ },
+ ec_gf8_mul_D3_ops};
+
+static ec_gf_op_t ec_gf8_mul_D4_ops[] = {
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_COPY, 8, 1, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR3, 1, 7, 8},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_D4 = {9,
+ {
+ 4,
+ 1,
+ 7,
+ 5,
+ 0,
+ 6,
+ 3,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_D4_ops};
+
+static ec_gf_op_t ec_gf8_mul_D5_ops[] = {
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_D5 = {8,
+ {
+ 6,
+ 7,
+ 4,
+ 5,
+ 2,
+ 3,
+ 1,
+ 0,
+ },
+ ec_gf8_mul_D5_ops};
+
+static ec_gf_op_t ec_gf8_mul_D6_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_D6 = {9,
+ {
+ 0,
+ 6,
+ 2,
+ 7,
+ 1,
+ 3,
+ 4,
+ 5,
+ 8,
+ },
+ ec_gf8_mul_D6_ops};
+
+static ec_gf_op_t ec_gf8_mul_D7_ops[] = {
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR3, 8, 3, 5}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR3, 6, 7, 8}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_D7 = {9,
+ {
+ 3,
+ 4,
+ 6,
+ 5,
+ 0,
+ 7,
+ 1,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_D7_ops};
+
+static ec_gf_op_t ec_gf8_mul_D8_ops[] = {
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_D8 = {8,
+ {
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ },
+ ec_gf8_mul_D8_ops};
+
+static ec_gf_op_t ec_gf8_mul_D9_ops[] = {
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_D9 = {8,
+ {
+ 1,
+ 2,
+ 6,
+ 7,
+ 4,
+ 5,
+ 0,
+ 3,
+ },
+ ec_gf8_mul_D9_ops};
+
+static ec_gf_op_t ec_gf8_mul_DA_ops[] = {
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR3, 8, 2, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 8, 0},
+ {EC_GF_OP_XOR2, 5, 8, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_DA = {9,
+ {
+ 2,
+ 5,
+ 7,
+ 1,
+ 0,
+ 4,
+ 3,
+ 6,
+ 8,
+ },
+ ec_gf8_mul_DA_ops};
+
+static ec_gf_op_t ec_gf8_mul_DB_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 8, 4, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 8, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_DB = {9,
+ {
+ 7,
+ 5,
+ 6,
+ 2,
+ 3,
+ 4,
+ 1,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_DB_ops};
+
+static ec_gf_op_t ec_gf8_mul_DC_ops[] = {
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_DC = {8,
+ {
+ 4,
+ 5,
+ 2,
+ 6,
+ 7,
+ 1,
+ 0,
+ 3,
+ },
+ ec_gf8_mul_DC_ops};
+
+static ec_gf_op_t ec_gf8_mul_DD_ops[] = {
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_DD = {8,
+ {
+ 1,
+ 2,
+ 3,
+ 6,
+ 7,
+ 0,
+ 4,
+ 5,
+ },
+ ec_gf8_mul_DD_ops};
+
+static ec_gf_op_t ec_gf8_mul_DE_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_DE = {8,
+ {
+ 0,
+ 5,
+ 2,
+ 6,
+ 7,
+ 1,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_DE_ops};
+
+static ec_gf_op_t ec_gf8_mul_DF_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 8, 3, 0},
+ {EC_GF_OP_COPY, 9, 0, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 8, 7, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 5, 8, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR3, 1, 9, 2}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_DF = {10,
+ {
+ 7,
+ 2,
+ 8,
+ 4,
+ 3,
+ 1,
+ 0,
+ 6,
+ 5,
+ 9,
+ },
+ ec_gf8_mul_DF_ops};
+
+static ec_gf_op_t ec_gf8_mul_E0_ops[] = {
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_E0 = {8,
+ {
+ 2,
+ 3,
+ 4,
+ 7,
+ 5,
+ 6,
+ 0,
+ 1,
+ },
+ ec_gf8_mul_E0_ops};
+
+static ec_gf_op_t ec_gf8_mul_E1_ops[] = {
+ {EC_GF_OP_COPY, 8, 1, 0}, {EC_GF_OP_XOR2, 8, 7, 0},
+ {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_XOR3, 9, 5, 3},
+ {EC_GF_OP_XOR2, 0, 9, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 4, 9, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 2, 8, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_E1 = {10,
+ {
+ 0,
+ 7,
+ 1,
+ 3,
+ 4,
+ 5,
+ 6,
+ 2,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_E1_ops};
+
+static ec_gf_op_t ec_gf8_mul_E2_ops[] = {
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_E2 = {8,
+ {
+ 2,
+ 3,
+ 7,
+ 1,
+ 5,
+ 6,
+ 0,
+ 4,
+ },
+ ec_gf8_mul_E2_ops};
+
+static ec_gf_op_t ec_gf8_mul_E3_ops[] = {
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR3, 8, 2, 7}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR3, 6, 8, 4},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_E3 = {9,
+ {
+ 5,
+ 4,
+ 7,
+ 2,
+ 1,
+ 3,
+ 6,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_E3_ops};
+
+static ec_gf_op_t ec_gf8_mul_E4_ops[] = {
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_E4 = {8,
+ {
+ 7,
+ 0,
+ 1,
+ 6,
+ 3,
+ 4,
+ 2,
+ 5,
+ },
+ ec_gf8_mul_E4_ops};
+
+static ec_gf_op_t ec_gf8_mul_E5_ops[] = {
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_E5 = {9,
+ {
+ 4,
+ 5,
+ 3,
+ 6,
+ 7,
+ 1,
+ 0,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_E5_ops};
+
+static ec_gf_op_t ec_gf8_mul_E6_ops[] = {
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_E6 = {8,
+ {
+ 5,
+ 4,
+ 3,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_E6_ops};
+
+static ec_gf_op_t ec_gf8_mul_E7_ops[] = {
+ {EC_GF_OP_COPY, 8, 6, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR3, 9, 0, 6}, {EC_GF_OP_XOR2, 4, 9, 0},
+ {EC_GF_OP_XOR2, 5, 9, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_E7 = {10,
+ {
+ 1,
+ 4,
+ 3,
+ 6,
+ 7,
+ 5,
+ 2,
+ 0,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_E7_ops};
+
+static ec_gf_op_t ec_gf8_mul_E8_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_E8 = {8,
+ {
+ 1,
+ 4,
+ 2,
+ 7,
+ 3,
+ 0,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_E8_ops};
+
+static ec_gf_op_t ec_gf8_mul_E9_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_COPY, 8, 1, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR3, 1, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_E9 = {9,
+ {
+ 6,
+ 2,
+ 0,
+ 3,
+ 4,
+ 1,
+ 5,
+ 7,
+ 8,
+ },
+ ec_gf8_mul_E9_ops};
+
+static ec_gf_op_t ec_gf8_mul_EA_ops[] = {
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_EA = {8,
+ {
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_EA_ops};
+
+static ec_gf_op_t ec_gf8_mul_EB_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_EB = {8,
+ {
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_EB_ops};
+
+static ec_gf_op_t ec_gf8_mul_EC_ops[] = {
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR3, 8, 4, 0}, {EC_GF_OP_XOR2, 1, 8, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_EC = {9,
+ {
+ 7,
+ 4,
+ 3,
+ 0,
+ 2,
+ 5,
+ 1,
+ 6,
+ 8,
+ },
+ ec_gf8_mul_EC_ops};
+
+static ec_gf_op_t ec_gf8_mul_ED_ops[] = {
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_ED = {8,
+ {
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 4,
+ 3,
+ 2,
+ },
+ ec_gf8_mul_ED_ops};
+
+static ec_gf_op_t ec_gf8_mul_EE_ops[] = {
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR3, 8, 2, 3},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 4, 8, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 8, 5, 0},
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 1, 8, 0}, {EC_GF_OP_XOR2, 7, 8, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_EE = {9,
+ {
+ 6,
+ 4,
+ 5,
+ 7,
+ 2,
+ 3,
+ 0,
+ 1,
+ 8,
+ },
+ ec_gf8_mul_EE_ops};
+
+static ec_gf_op_t ec_gf8_mul_EF_ops[] = {
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_COPY, 8, 0, 0},
+ {EC_GF_OP_XOR2, 8, 2, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 7, 8, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 6, 8, 0},
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_EF = {9,
+ {
+ 6,
+ 4,
+ 5,
+ 7,
+ 2,
+ 0,
+ 3,
+ 1,
+ 8,
+ },
+ ec_gf8_mul_EF_ops};
+
+static ec_gf_op_t ec_gf8_mul_F0_ops[] = {
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR3, 8, 3, 6},
+ {EC_GF_OP_XOR2, 5, 8, 0}, {EC_GF_OP_XOR2, 8, 4, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 7, 8, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 1, 8, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_F0 = {9,
+ {
+ 3,
+ 4,
+ 6,
+ 1,
+ 2,
+ 0,
+ 5,
+ 7,
+ 8,
+ },
+ ec_gf8_mul_F0_ops};
+
+static ec_gf_op_t ec_gf8_mul_F1_ops[] = {
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_COPY, 8, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_COPY, 9, 2, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 9, 0, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 7, 9, 0}, {EC_GF_OP_XOR2, 4, 9, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR3, 9, 8, 7},
+ {EC_GF_OP_XOR2, 1, 9, 0}, {EC_GF_OP_XOR2, 5, 9, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_F1 = {10,
+ {
+ 7,
+ 2,
+ 6,
+ 3,
+ 5,
+ 1,
+ 4,
+ 0,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_F1_ops};
+
+static ec_gf_op_t ec_gf8_mul_F2_ops[] = {
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR3, 8, 6, 4},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_XOR2, 5, 8, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_F2 = {9,
+ {
+ 1,
+ 0,
+ 6,
+ 7,
+ 4,
+ 5,
+ 2,
+ 3,
+ 8,
+ },
+ ec_gf8_mul_F2_ops};
+
+static ec_gf_op_t ec_gf8_mul_F3_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_F3 = {8,
+ {
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_F3_ops};
+
+static ec_gf_op_t ec_gf8_mul_F4_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_F4 = {8,
+ {
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_F4_ops};
+
+static ec_gf_op_t ec_gf8_mul_F5_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_F5 = {8,
+ {
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_F5_ops};
+
+static ec_gf_op_t ec_gf8_mul_F6_ops[] = {
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_COPY, 8, 3, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_COPY, 9, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 9, 4, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 6, 9, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR3, 7, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_F6 = {10,
+ {
+ 0,
+ 6,
+ 2,
+ 7,
+ 4,
+ 3,
+ 5,
+ 9,
+ 1,
+ 8,
+ },
+ ec_gf8_mul_F6_ops};
+
+static ec_gf_op_t ec_gf8_mul_F7_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_F7 = {8,
+ {
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ },
+ ec_gf8_mul_F7_ops};
+
+static ec_gf_op_t ec_gf8_mul_F8_ops[] = {
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_F8 = {8,
+ {
+ 6,
+ 2,
+ 0,
+ 1,
+ 4,
+ 5,
+ 3,
+ 7,
+ },
+ ec_gf8_mul_F8_ops};
+
+static ec_gf_op_t ec_gf8_mul_F9_ops[] = {
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR3, 8, 7, 1}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_XOR2, 5, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_F9 = {9,
+ {
+ 4,
+ 1,
+ 7,
+ 6,
+ 0,
+ 3,
+ 5,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_F9_ops};
+
+static ec_gf_op_t ec_gf8_mul_FA_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_FA = {8,
+ {
+ 0,
+ 1,
+ 2,
+ 4,
+ 5,
+ 6,
+ 7,
+ 3,
+ },
+ ec_gf8_mul_FA_ops};
+
+static ec_gf_op_t ec_gf8_mul_FB_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_FB = {8,
+ {
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ },
+ ec_gf8_mul_FB_ops};
+
+static ec_gf_op_t ec_gf8_mul_FC_ops[] = {
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_COPY, 9, 3, 0},
+ {EC_GF_OP_XOR3, 8, 5, 7}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 8, 3, 0}, {EC_GF_OP_XOR2, 2, 8, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR3, 0, 9, 2}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_FC = {10,
+ {
+ 5,
+ 6,
+ 3,
+ 7,
+ 1,
+ 8,
+ 0,
+ 4,
+ 2,
+ 9,
+ },
+ ec_gf8_mul_FC_ops};
+
+static ec_gf_op_t ec_gf8_mul_FD_ops[] = {
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_COPY, 8, 7, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR3, 1, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_FD = {9,
+ {
+ 5,
+ 3,
+ 7,
+ 6,
+ 1,
+ 2,
+ 4,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_FD_ops};
+
+static ec_gf_op_t ec_gf8_mul_FE_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_COPY, 8, 2, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 8, 5, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_FE = {9,
+ {
+ 3,
+ 4,
+ 8,
+ 2,
+ 5,
+ 0,
+ 6,
+ 1,
+ 7,
+ },
+ ec_gf8_mul_FE_ops};
+
+static ec_gf_op_t ec_gf8_mul_FF_ops[] = {
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_COPY, 9, 0, 0},
+ {EC_GF_OP_COPY, 8, 4, 0}, {EC_GF_OP_XOR2, 9, 1, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 9, 4, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 3, 9, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR3, 3, 8, 5}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_FF = {10,
+ {
+ 6,
+ 5,
+ 0,
+ 1,
+ 2,
+ 4,
+ 9,
+ 3,
+ 7,
+ 8,
+ },
+ ec_gf8_mul_FF_ops};
+
+ec_gf_mul_t *ec_gf8_mul[] = {
+ &ec_gf8_mul_00, &ec_gf8_mul_01, &ec_gf8_mul_02, &ec_gf8_mul_03,
+ &ec_gf8_mul_04, &ec_gf8_mul_05, &ec_gf8_mul_06, &ec_gf8_mul_07,
+ &ec_gf8_mul_08, &ec_gf8_mul_09, &ec_gf8_mul_0A, &ec_gf8_mul_0B,
+ &ec_gf8_mul_0C, &ec_gf8_mul_0D, &ec_gf8_mul_0E, &ec_gf8_mul_0F,
+ &ec_gf8_mul_10, &ec_gf8_mul_11, &ec_gf8_mul_12, &ec_gf8_mul_13,
+ &ec_gf8_mul_14, &ec_gf8_mul_15, &ec_gf8_mul_16, &ec_gf8_mul_17,
+ &ec_gf8_mul_18, &ec_gf8_mul_19, &ec_gf8_mul_1A, &ec_gf8_mul_1B,
+ &ec_gf8_mul_1C, &ec_gf8_mul_1D, &ec_gf8_mul_1E, &ec_gf8_mul_1F,
+ &ec_gf8_mul_20, &ec_gf8_mul_21, &ec_gf8_mul_22, &ec_gf8_mul_23,
+ &ec_gf8_mul_24, &ec_gf8_mul_25, &ec_gf8_mul_26, &ec_gf8_mul_27,
+ &ec_gf8_mul_28, &ec_gf8_mul_29, &ec_gf8_mul_2A, &ec_gf8_mul_2B,
+ &ec_gf8_mul_2C, &ec_gf8_mul_2D, &ec_gf8_mul_2E, &ec_gf8_mul_2F,
+ &ec_gf8_mul_30, &ec_gf8_mul_31, &ec_gf8_mul_32, &ec_gf8_mul_33,
+ &ec_gf8_mul_34, &ec_gf8_mul_35, &ec_gf8_mul_36, &ec_gf8_mul_37,
+ &ec_gf8_mul_38, &ec_gf8_mul_39, &ec_gf8_mul_3A, &ec_gf8_mul_3B,
+ &ec_gf8_mul_3C, &ec_gf8_mul_3D, &ec_gf8_mul_3E, &ec_gf8_mul_3F,
+ &ec_gf8_mul_40, &ec_gf8_mul_41, &ec_gf8_mul_42, &ec_gf8_mul_43,
+ &ec_gf8_mul_44, &ec_gf8_mul_45, &ec_gf8_mul_46, &ec_gf8_mul_47,
+ &ec_gf8_mul_48, &ec_gf8_mul_49, &ec_gf8_mul_4A, &ec_gf8_mul_4B,
+ &ec_gf8_mul_4C, &ec_gf8_mul_4D, &ec_gf8_mul_4E, &ec_gf8_mul_4F,
+ &ec_gf8_mul_50, &ec_gf8_mul_51, &ec_gf8_mul_52, &ec_gf8_mul_53,
+ &ec_gf8_mul_54, &ec_gf8_mul_55, &ec_gf8_mul_56, &ec_gf8_mul_57,
+ &ec_gf8_mul_58, &ec_gf8_mul_59, &ec_gf8_mul_5A, &ec_gf8_mul_5B,
+ &ec_gf8_mul_5C, &ec_gf8_mul_5D, &ec_gf8_mul_5E, &ec_gf8_mul_5F,
+ &ec_gf8_mul_60, &ec_gf8_mul_61, &ec_gf8_mul_62, &ec_gf8_mul_63,
+ &ec_gf8_mul_64, &ec_gf8_mul_65, &ec_gf8_mul_66, &ec_gf8_mul_67,
+ &ec_gf8_mul_68, &ec_gf8_mul_69, &ec_gf8_mul_6A, &ec_gf8_mul_6B,
+ &ec_gf8_mul_6C, &ec_gf8_mul_6D, &ec_gf8_mul_6E, &ec_gf8_mul_6F,
+ &ec_gf8_mul_70, &ec_gf8_mul_71, &ec_gf8_mul_72, &ec_gf8_mul_73,
+ &ec_gf8_mul_74, &ec_gf8_mul_75, &ec_gf8_mul_76, &ec_gf8_mul_77,
+ &ec_gf8_mul_78, &ec_gf8_mul_79, &ec_gf8_mul_7A, &ec_gf8_mul_7B,
+ &ec_gf8_mul_7C, &ec_gf8_mul_7D, &ec_gf8_mul_7E, &ec_gf8_mul_7F,
+ &ec_gf8_mul_80, &ec_gf8_mul_81, &ec_gf8_mul_82, &ec_gf8_mul_83,
+ &ec_gf8_mul_84, &ec_gf8_mul_85, &ec_gf8_mul_86, &ec_gf8_mul_87,
+ &ec_gf8_mul_88, &ec_gf8_mul_89, &ec_gf8_mul_8A, &ec_gf8_mul_8B,
+ &ec_gf8_mul_8C, &ec_gf8_mul_8D, &ec_gf8_mul_8E, &ec_gf8_mul_8F,
+ &ec_gf8_mul_90, &ec_gf8_mul_91, &ec_gf8_mul_92, &ec_gf8_mul_93,
+ &ec_gf8_mul_94, &ec_gf8_mul_95, &ec_gf8_mul_96, &ec_gf8_mul_97,
+ &ec_gf8_mul_98, &ec_gf8_mul_99, &ec_gf8_mul_9A, &ec_gf8_mul_9B,
+ &ec_gf8_mul_9C, &ec_gf8_mul_9D, &ec_gf8_mul_9E, &ec_gf8_mul_9F,
+ &ec_gf8_mul_A0, &ec_gf8_mul_A1, &ec_gf8_mul_A2, &ec_gf8_mul_A3,
+ &ec_gf8_mul_A4, &ec_gf8_mul_A5, &ec_gf8_mul_A6, &ec_gf8_mul_A7,
+ &ec_gf8_mul_A8, &ec_gf8_mul_A9, &ec_gf8_mul_AA, &ec_gf8_mul_AB,
+ &ec_gf8_mul_AC, &ec_gf8_mul_AD, &ec_gf8_mul_AE, &ec_gf8_mul_AF,
+ &ec_gf8_mul_B0, &ec_gf8_mul_B1, &ec_gf8_mul_B2, &ec_gf8_mul_B3,
+ &ec_gf8_mul_B4, &ec_gf8_mul_B5, &ec_gf8_mul_B6, &ec_gf8_mul_B7,
+ &ec_gf8_mul_B8, &ec_gf8_mul_B9, &ec_gf8_mul_BA, &ec_gf8_mul_BB,
+ &ec_gf8_mul_BC, &ec_gf8_mul_BD, &ec_gf8_mul_BE, &ec_gf8_mul_BF,
+ &ec_gf8_mul_C0, &ec_gf8_mul_C1, &ec_gf8_mul_C2, &ec_gf8_mul_C3,
+ &ec_gf8_mul_C4, &ec_gf8_mul_C5, &ec_gf8_mul_C6, &ec_gf8_mul_C7,
+ &ec_gf8_mul_C8, &ec_gf8_mul_C9, &ec_gf8_mul_CA, &ec_gf8_mul_CB,
+ &ec_gf8_mul_CC, &ec_gf8_mul_CD, &ec_gf8_mul_CE, &ec_gf8_mul_CF,
+ &ec_gf8_mul_D0, &ec_gf8_mul_D1, &ec_gf8_mul_D2, &ec_gf8_mul_D3,
+ &ec_gf8_mul_D4, &ec_gf8_mul_D5, &ec_gf8_mul_D6, &ec_gf8_mul_D7,
+ &ec_gf8_mul_D8, &ec_gf8_mul_D9, &ec_gf8_mul_DA, &ec_gf8_mul_DB,
+ &ec_gf8_mul_DC, &ec_gf8_mul_DD, &ec_gf8_mul_DE, &ec_gf8_mul_DF,
+ &ec_gf8_mul_E0, &ec_gf8_mul_E1, &ec_gf8_mul_E2, &ec_gf8_mul_E3,
+ &ec_gf8_mul_E4, &ec_gf8_mul_E5, &ec_gf8_mul_E6, &ec_gf8_mul_E7,
+ &ec_gf8_mul_E8, &ec_gf8_mul_E9, &ec_gf8_mul_EA, &ec_gf8_mul_EB,
+ &ec_gf8_mul_EC, &ec_gf8_mul_ED, &ec_gf8_mul_EE, &ec_gf8_mul_EF,
+ &ec_gf8_mul_F0, &ec_gf8_mul_F1, &ec_gf8_mul_F2, &ec_gf8_mul_F3,
+ &ec_gf8_mul_F4, &ec_gf8_mul_F5, &ec_gf8_mul_F6, &ec_gf8_mul_F7,
+ &ec_gf8_mul_F8, &ec_gf8_mul_F9, &ec_gf8_mul_FA, &ec_gf8_mul_FB,
+ &ec_gf8_mul_FC, &ec_gf8_mul_FD, &ec_gf8_mul_FE, &ec_gf8_mul_FF};
diff --git a/xlators/cluster/ec/src/ec-gf.h b/xlators/cluster/ec/src/ec-gf8.h
index 23bca91e3b5..4aca91127fc 100644
--- a/xlators/cluster/ec/src/ec-gf.h
+++ b/xlators/cluster/ec/src/ec-gf8.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -11,13 +11,8 @@
#ifndef __EC_GF8_H__
#define __EC_GF8_H__
-#define EC_GF_BITS 8
-#define EC_GF_MOD 0x11D
+#include "ec-galois.h"
-#define EC_GF_SIZE (1 << EC_GF_BITS)
-#define EC_GF_WORD_SIZE sizeof(uint64_t)
-
-extern void (* ec_gf_muladd[])(uint8_t * out, uint8_t * in,
- unsigned int width);
+extern ec_gf_mul_t *ec_gf8_mul[];
#endif /* __EC_GF8_H__ */
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index b8560005b94..7d991f04aac 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -8,158 +8,164 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-#include "compat-errno.h"
-
+#include <glusterfs/defaults.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/syncop-utils.h>
+#include <glusterfs/cluster-syncop.h>
+
+#include "ec.h"
+#include "ec-types.h"
+#include "ec-messages.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
#include "ec-method.h"
#include "ec-fops.h"
-
-#include "ec-mem-types.h"
-#include "ec-data.h"
-
-static char *ec_ignore_xattrs[] = {
- GF_SELINUX_XATTR_KEY,
- QUOTA_SIZE_KEY,
- NULL
+#include "ec-heald.h"
+
+#define EC_COUNT(array, max) \
+ ({ \
+ int __i; \
+ int __res = 0; \
+ for (__i = 0; __i < max; __i++) \
+ if (array[__i]) \
+ __res++; \
+ __res; \
+ })
+#define EC_INTERSECT(dst, src1, src2, max) \
+ ({ \
+ int __i; \
+ for (__i = 0; __i < max; __i++) \
+ dst[__i] = src1[__i] && src2[__i]; \
+ })
+#define EC_ADJUST_SOURCE(source, sources, max) \
+ ({ \
+ int __i; \
+ if (sources[source] == 0) { \
+ source = -1; \
+ for (__i = 0; __i < max; __i++) \
+ if (sources[__i]) \
+ source = __i; \
+ } \
+ })
+#define IA_EQUAL(f, s, field) \
+ (memcmp(&(f.ia_##field), &(s.ia_##field), sizeof(s.ia_##field)) == 0)
+#define EC_REPLIES_ALLOC(replies, numsubvols) \
+ do { \
+ int __i = 0; \
+ replies = alloca0(numsubvols * sizeof(*replies)); \
+ for (__i = 0; __i < numsubvols; __i++) \
+ INIT_LIST_HEAD(&replies[__i].entries.list); \
+ } while (0)
+
+struct ec_name_data {
+ call_frame_t *frame;
+ unsigned char *participants;
+ unsigned char *failed_on;
+ unsigned char *gfidless;
+ unsigned char *enoent;
+ unsigned char *same;
+ char *name;
+ inode_t *parent;
+ default_args_cbk_t *replies;
+ uint32_t heal_pending;
};
+static char *ec_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL};
+
static gf_boolean_t
-ec_ignorable_key_match (dict_t *dict, char *key, data_t *val, void *mdata)
+ec_ignorable_key_match(dict_t *dict, char *key, data_t *val, void *mdata)
{
- int i = 0;
+ int i = 0;
- if (!key)
- goto out;
+ if (!key)
+ goto out;
- for (i = 0; ec_ignore_xattrs[i]; i++) {
- if (!strcmp (key, ec_ignore_xattrs[i]))
- return _gf_true;
- }
+ if (strncmp(key, EC_XATTR_PREFIX, SLEN(EC_XATTR_PREFIX)) == 0)
+ return _gf_true;
+
+ for (i = 0; ec_ignore_xattrs[i]; i++) {
+ if (!strcmp(key, ec_ignore_xattrs[i]))
+ return _gf_true;
+ }
out:
- return _gf_false;
+ return _gf_false;
}
-/* FOP: heal */
-
-void ec_heal_exclude(ec_heal_t * heal, uintptr_t mask)
+static gf_boolean_t
+ec_sh_key_match(dict_t *dict, char *key, data_t *val, void *mdata)
{
- LOCK(&heal->lock);
-
- heal->bad &= ~mask;
-
- UNLOCK(&heal->lock);
+ return !ec_ignorable_key_match(dict, key, val, mdata);
}
+/* FOP: heal */
-void ec_heal_lookup_resume(ec_fop_data_t * fop)
+void
+ec_set_entry_healing(ec_fop_data_t *fop)
{
- ec_heal_t * heal = fop->data;
- ec_cbk_data_t * cbk;
- uintptr_t good = 0, bad = 0;
+ ec_inode_t *ctx = NULL;
+ loc_t *loc = NULL;
- if (heal->lookup != NULL)
- {
- ec_fop_data_release(heal->lookup);
- }
- ec_fop_data_acquire(fop);
+ if (!fop)
+ return;
- list_for_each_entry(cbk, &fop->cbk_list, list)
+ loc = &fop->loc[0];
+ LOCK(&loc->inode->lock);
{
- if ((cbk->op_ret < 0) && (cbk->op_errno == ENOTCONN))
- {
- continue;
- }
-
- if (cbk == fop->answer)
- {
- if (cbk->op_ret >= 0)
- {
- heal->iatt = cbk->iatt[0];
- heal->version = cbk->version;
- heal->raw_size = cbk->size;
- heal->fop->pre_size = cbk->iatt[0].ia_size;
- heal->fop->post_size = cbk->iatt[0].ia_size;
- heal->fop->have_size = 1;
-
- if (ec_loc_update(heal->xl, &heal->loc, cbk->inode,
- &cbk->iatt[0]) != 0)
- {
- fop->answer = NULL;
- fop->error = EIO;
-
- bad |= cbk->mask;
-
- continue;
- }
- }
-
- good |= cbk->mask;
- }
- else
- {
- bad |= cbk->mask;
+ ctx = __ec_inode_get(loc->inode, fop->xl);
+ if (ctx) {
+ ctx->heal_count += 1;
}
}
-
- /* Heal lookups are not executed concurrently with anything else. So, when
- * a lookup finishes, it's safe to access heal->good and heal->bad without
- * acquiring any lock.
- */
- heal->good = good;
- heal->bad = bad;
-
- heal->lookup = fop;
-
- ec_resume_parent(fop, fop->answer != NULL ? 0 : fop->error);
+ UNLOCK(&loc->inode->lock);
}
-int32_t ec_heal_entry_lookup_cbk(call_frame_t * frame, void * cookie,
- xlator_t * this, int32_t op_ret,
- int32_t op_errno, inode_t * inode,
- struct iatt * buf, dict_t * xdata,
- struct iatt * postparent)
-{
- ec_heal_lookup_resume(cookie);
-
- return 0;
-}
-
-int32_t ec_heal_inode_lookup_cbk(call_frame_t * frame, void * cookie,
- xlator_t * this, int32_t op_ret,
- int32_t op_errno, inode_t * inode,
- struct iatt * buf, dict_t * xdata,
- struct iatt * postparent)
+void
+ec_reset_entry_healing(ec_fop_data_t *fop)
{
- ec_heal_lookup_resume(cookie);
+ ec_inode_t *ctx = NULL;
+ loc_t *loc = NULL;
+ int32_t heal_count = 0;
+ if (!fop)
+ return;
- return 0;
+ loc = &fop->loc[0];
+ LOCK(&loc->inode->lock);
+ {
+ ctx = __ec_inode_get(loc->inode, fop->xl);
+ if (ctx) {
+ ctx->heal_count += -1;
+ heal_count = ctx->heal_count;
+ }
+ }
+ UNLOCK(&loc->inode->lock);
+ GF_ASSERT(heal_count >= 0);
}
-uintptr_t ec_heal_check(ec_fop_data_t * fop, uintptr_t * pgood)
+uintptr_t
+ec_heal_check(ec_fop_data_t *fop, uintptr_t *pgood)
{
- ec_cbk_data_t * cbk;
- uintptr_t mask[2] = { 0, 0 };
+ ec_cbk_data_t *cbk;
+ uintptr_t mask[2] = {0, 0};
list_for_each_entry(cbk, &fop->cbk_list, list)
{
mask[cbk->op_ret >= 0] |= cbk->mask;
}
- if (pgood != NULL)
- {
+ if (pgood != NULL) {
*pgood = mask[1];
}
return mask[0];
}
-void ec_heal_update(ec_fop_data_t * fop, int32_t is_open)
+void
+ec_heal_update(ec_fop_data_t *fop, int32_t is_open)
{
- ec_heal_t * heal = fop->data;
+ ec_heal_t *heal = fop->data;
uintptr_t good, bad;
bad = ec_heal_check(fop, &good);
@@ -167,8 +173,7 @@ void ec_heal_update(ec_fop_data_t * fop, int32_t is_open)
LOCK(&heal->lock);
heal->bad &= ~bad;
- if (is_open)
- {
+ if (is_open) {
heal->open |= good;
}
@@ -177,9 +182,10 @@ void ec_heal_update(ec_fop_data_t * fop, int32_t is_open)
fop->error = 0;
}
-void ec_heal_avoid(ec_fop_data_t * fop)
+void
+ec_heal_avoid(ec_fop_data_t *fop)
{
- ec_heal_t * heal = fop->data;
+ ec_heal_t *heal = fop->data;
uintptr_t bad;
bad = ec_heal_check(fop, NULL);
@@ -191,1400 +197,3171 @@ void ec_heal_avoid(ec_fop_data_t * fop)
UNLOCK(&heal->lock);
}
-int32_t ec_heal_mkdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, inode_t * inode,
- struct iatt * buf, struct iatt * preparent,
- struct iatt * postparent, dict_t * xdata)
+int32_t
+ec_heal_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- ec_heal_update(cookie, 0);
+ ec_fop_data_t *fop = cookie;
+ ec_heal_t *heal = fop->data;
+
+ if (op_ret >= 0) {
+ GF_ASSERT(
+ ec_set_inode_size(heal->fop, heal->fd->inode, heal->total_size));
+ }
return 0;
}
-int32_t ec_heal_mknod_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, inode_t * inode,
- struct iatt * buf, struct iatt * preparent,
- struct iatt * postparent, dict_t * xdata)
+void
+ec_heal_lock(ec_heal_t *heal, int32_t type, fd_t *fd, loc_t *loc, off_t offset,
+ size_t size)
{
- ec_heal_update(cookie, 0);
+ struct gf_flock flock;
+ fop_inodelk_cbk_t cbk = NULL;
- return 0;
+ flock.l_type = type;
+ flock.l_whence = SEEK_SET;
+ flock.l_start = offset;
+ flock.l_len = size;
+ flock.l_pid = 0;
+ flock.l_owner.len = 0;
+
+ if (type == F_UNLCK) {
+ /* Remove inode size information before unlocking it. */
+ if (fd == NULL) {
+ ec_clear_inode_info(heal->fop, heal->loc.inode);
+ } else {
+ ec_clear_inode_info(heal->fop, heal->fd->inode);
+ }
+ cbk = ec_lock_unlocked;
+ } else {
+ /* Otherwise use the callback to update size information. */
+ cbk = ec_heal_lock_cbk;
+ }
+
+ if (fd != NULL) {
+ ec_finodelk(heal->fop->frame, heal->xl,
+ &heal->fop->frame->root->lk_owner, heal->fop->mask,
+ EC_MINIMUM_ALL, cbk, heal, heal->xl->name, fd, F_SETLKW,
+ &flock, NULL);
+ } else {
+ ec_inodelk(heal->fop->frame, heal->xl,
+ &heal->fop->frame->root->lk_owner, heal->fop->mask,
+ EC_MINIMUM_ALL, cbk, heal, heal->xl->name, loc, F_SETLKW,
+ &flock, NULL);
+ }
}
-int32_t ec_heal_symlink_cbk(call_frame_t * frame, void * cookie,
- xlator_t * this, int32_t op_ret, int32_t op_errno,
- inode_t * inode, struct iatt * buf,
- struct iatt * preparent, struct iatt * postparent,
- dict_t * xdata)
+void
+ec_heal_inodelk(ec_heal_t *heal, int32_t type, int32_t use_fd, off_t offset,
+ size_t size)
{
- ec_heal_update(cookie, 0);
+ ec_heal_lock(heal, type, use_fd ? heal->fd : NULL, &heal->loc, offset,
+ size);
+}
+
+int32_t
+ec_heal_xattr_clean(dict_t *dict, char *key, data_t *data, void *arg)
+{
+ dict_t *base = arg;
+
+ if (ec_ignorable_key_match(NULL, key, NULL, NULL)) {
+ dict_del(dict, key);
+ return 0;
+ }
+
+ if (dict_get(base, key) != NULL)
+ dict_del(dict, key);
return 0;
}
-int32_t ec_heal_setattr_cbk(call_frame_t * frame, void * cookie,
- xlator_t * this, int32_t op_ret, int32_t op_errno,
- struct iatt * preop_stbuf,
- struct iatt * postop_stbuf,
- dict_t * xdata)
+/********************************************************************
+ * ec_wind_xattrop_parallel:
+ * Helper function to update the extended attributes
+ * in parallel.
+ *
+ *******************************************************************/
+void
+ec_wind_xattrop_parallel(call_frame_t *frame, xlator_t *subvol, int child_index,
+ loc_t *loc, gf_xattrop_flags_t flags, dict_t **dict,
+ dict_t *xdata)
{
+ gf_msg_debug("EC", 0, "WIND: on child %d ", child_index);
+ STACK_WIND_COOKIE(
+ frame, cluster_xattrop_cbk, (void *)(uintptr_t)child_index, subvol,
+ subvol->fops->xattrop, loc, flags, dict[child_index], xdata);
+}
+
+int32_t
+ec_heal_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_heal_t *heal = fop->data;
+
+ ec_trace("WRITE_CBK", cookie, "ret=%d, errno=%d", op_ret, op_errno);
+
+ gf_msg_debug(fop->xl->name, 0,
+ "%s: write op_ret %d, op_errno %s"
+ " at %" PRIu64,
+ uuid_utoa(heal->fd->inode->gfid), op_ret, strerror(op_errno),
+ heal->offset);
+
ec_heal_update(cookie, 0);
return 0;
}
-int32_t ec_heal_setxattr_cbk(call_frame_t * frame, void * cookie,
- xlator_t * this, int32_t op_ret, int32_t op_errno,
- dict_t * xdata)
+int32_t
+ec_heal_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
{
- ec_heal_update(cookie, 0);
+ ec_fop_data_t *fop = cookie;
+ ec_heal_t *heal = fop->data;
+
+ ec_trace("READ_CBK", fop, "ret=%d, errno=%d", op_ret, op_errno);
+
+ ec_heal_avoid(fop);
+
+ if (op_ret > 0) {
+ gf_msg_debug(fop->xl->name, 0,
+ "%s: read succeeded, proceeding "
+ "to write at %" PRIu64,
+ uuid_utoa(heal->fd->inode->gfid), heal->offset);
+ ec_writev(heal->fop->frame, heal->xl, heal->bad, EC_MINIMUM_ONE,
+ ec_heal_writev_cbk, heal, heal->fd, vector, count,
+ heal->offset, 0, iobref, NULL);
+ } else {
+ if (op_ret < 0) {
+ gf_msg_debug(fop->xl->name, 0,
+ "%s: read failed %s, failing "
+ "to heal block at %" PRIu64,
+ uuid_utoa(heal->fd->inode->gfid), strerror(op_errno),
+ heal->offset);
+ heal->bad = 0;
+ }
+ heal->done = 1;
+ }
return 0;
}
-int32_t ec_heal_removexattr_cbk(call_frame_t * frame, void * cookie,
- xlator_t * this, int32_t op_ret,
- int32_t op_errno, dict_t * xdata)
+void
+ec_heal_data_block(ec_heal_t *heal)
{
- ec_heal_update(cookie, 0);
+ ec_trace("DATA", heal->fop, "good=%lX, bad=%lX", heal->good, heal->bad);
- return 0;
+ if ((heal->good != 0) && (heal->bad != 0) &&
+ (heal->iatt.ia_type == IA_IFREG)) {
+ ec_readv(heal->fop->frame, heal->xl, heal->good, EC_MINIMUM_MIN,
+ ec_heal_readv_cbk, heal, heal->fd, heal->size, heal->offset, 0,
+ NULL);
+ }
}
-int32_t ec_heal_target_open_cbk(call_frame_t * frame, void * cookie,
- xlator_t * this, int32_t op_ret,
- int32_t op_errno, fd_t * fd, dict_t * xdata)
+/* FOP: fheal */
+
+void
+ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fheal_cbk_t func, void *data, fd_t *fd,
+ int32_t partial, dict_t *xdata)
{
- ec_heal_update(cookie, 1);
+ ec_fd_t *ctx = ec_fd_get(fd, this);
- return 0;
+ if (ctx != NULL) {
+ gf_msg_trace("ec", 0, "FHEAL ctx: flags=%X, open=%" PRIXPTR, ctx->flags,
+ ctx->open);
+ ec_heal(frame, this, target, fop_flags, func, data, &ctx->loc, partial,
+ xdata);
+ }
}
-int32_t ec_heal_source_open_cbk(call_frame_t * frame, void * cookie,
- xlator_t * this, int32_t op_ret,
- int32_t op_errno, fd_t * fd, dict_t * xdata)
+/* Common heal code */
+void
+ec_mask_to_char_array(uintptr_t mask, unsigned char *array, int numsubvols)
{
- ec_heal_avoid(cookie);
+ int i = 0;
- return 0;
+ for (i = 0; i < numsubvols; i++)
+ array[i] = ((mask >> i) & 1);
}
-int32_t ec_heal_reopen_cbk(call_frame_t * frame, void * cookie,
- xlator_t * this, int32_t op_ret, int32_t op_errno,
- fd_t * fd, dict_t * xdata)
+uintptr_t
+ec_char_array_to_mask(unsigned char *array, int numsubvols)
{
- ec_fop_data_t * fop = cookie;
- ec_fd_t * ctx;
- uintptr_t good;
+ int i = 0;
+ uintptr_t mask = 0;
- ec_heal_check(fop, &good);
+ if (array == NULL)
+ goto out;
- if (good != 0)
- {
- LOCK(&fd->lock);
+ for (i = 0; i < numsubvols; i++)
+ if (array[i])
+ mask |= (1ULL << i);
+out:
+ return mask;
+}
+
+int
+ec_heal_entry_find_direction(ec_t *ec, default_args_cbk_t *replies,
+ uint64_t *versions, uint64_t *dirty,
+ unsigned char *sources,
+ unsigned char *healed_sinks)
+{
+ uint64_t xattr[EC_VERSION_SIZE] = {0};
+ int source = -1;
+ uint64_t max_version = 0;
+ int ret = 0;
+ int i = 0;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!replies[i].valid)
+ continue;
- ctx = __ec_fd_get(fd, fop->xl);
- if (ctx != NULL) {
- ctx->bad &= ~good;
- ctx->open |= good;
+ if (replies[i].op_ret == -1)
+ continue;
+
+ if (source == -1)
+ source = i;
+
+ ret = ec_dict_get_array(replies[i].xdata, EC_XATTR_VERSION, xattr,
+ EC_VERSION_SIZE);
+ if (ret == 0) {
+ versions[i] = xattr[EC_DATA_TXN];
+ if (max_version < versions[i]) {
+ max_version = versions[i];
+ source = i;
+ }
}
- UNLOCK(&fd->lock);
+ memset(xattr, 0, sizeof(xattr));
+ ret = ec_dict_get_array(replies[i].xdata, EC_XATTR_DIRTY, xattr,
+ EC_VERSION_SIZE);
+ if (ret == 0) {
+ dirty[i] = xattr[EC_DATA_TXN];
+ }
}
- return 0;
-}
+ if (source < 0)
+ goto out;
-int32_t ec_heal_create (ec_heal_t *heal, uintptr_t mask)
-{
- dict_t * xdata;
- int error = 0;
+ for (i = 0; i < ec->nodes; i++) {
+ if (!replies[i].valid)
+ continue;
- xdata = dict_new();
- if (xdata == NULL)
- return ENOMEM;
+ if (replies[i].op_ret == -1)
+ continue;
- if (dict_set_static_bin(xdata, "gfid-req", heal->iatt.ia_gfid,
- sizeof(uuid_t))) {
- error = ENOMEM;
- goto out;
+ if (versions[i] == versions[source])
+ sources[i] = 1;
+ else
+ healed_sinks[i] = 1;
}
- switch (heal->iatt.ia_type)
- {
- case IA_IFDIR:
- ec_mkdir(heal->fop->frame, heal->xl, mask, EC_MINIMUM_ONE,
- ec_heal_mkdir_cbk, heal, &heal->loc,
- st_mode_from_ia(heal->iatt.ia_prot, heal->iatt.ia_type),
- 0, xdata);
+out:
+ return source;
+}
- break;
+int
+ec_adjust_versions(call_frame_t *frame, ec_t *ec, ec_txn_t type, inode_t *inode,
+ int source, unsigned char *sources,
+ unsigned char *healed_sinks, uint64_t *versions,
+ uint64_t *dirty)
+{
+ int i = 0;
+ int ret = 0;
+ int call_count = 0;
+ dict_t **xattr = NULL;
+ int op_ret = 0;
+ loc_t loc = {0};
+ gf_boolean_t erase_dirty = _gf_false;
+ uint64_t *versions_xattr = NULL;
+ uint64_t *dirty_xattr = NULL;
+ uint64_t allzero[2] = {0};
+ unsigned char *on = NULL;
+ unsigned char *output = NULL;
+ default_args_cbk_t *replies = NULL;
+
+ /* Allocate the required memory */
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer);
+ if (!xattr) {
+ op_ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < ec->nodes; i++) {
+ xattr[i] = dict_new();
+ if (!xattr[i]) {
+ op_ret = -ENOMEM;
+ goto out;
+ }
+ }
- case IA_IFLNK:
- ec_symlink(heal->fop->frame, heal->xl, mask, EC_MINIMUM_ONE,
- ec_heal_symlink_cbk, heal, heal->symlink, &heal->loc,
- 0, xdata);
+ /* dirty xattr represents if the file/dir needs heal. Unless all the
+ * copies are healed, don't erase it */
+ if (EC_COUNT(sources, ec->nodes) + EC_COUNT(healed_sinks, ec->nodes) ==
+ ec->nodes)
+ erase_dirty = _gf_true;
+ else
+ op_ret = -ENOTCONN;
- break;
+ /* Populate the xattr array */
+ for (i = 0; i < ec->nodes; i++) {
+ if (!sources[i] && !healed_sinks[i])
+ continue;
+ versions_xattr = GF_CALLOC(EC_VERSION_SIZE, sizeof(*versions_xattr),
+ gf_common_mt_pointer);
+ if (!versions_xattr) {
+ op_ret = -ENOMEM;
+ continue;
+ }
- default:
- /* If mknod comes with the presence of GLUSTERFS_INTERNAL_FOP_KEY
- * then posix_mknod checks if there are already any gfid-links and
- * does link() instead of mknod. There still can be a race where
- * two posix_mknods with same gfid see that gfid-link file is not
- * present and proceeds with mknods and result in two different
- * files with same gfid. which is yet to be fixed in posix.*/
- if (dict_set_int32 (xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1)) {
- error = ENOMEM;
- goto out;
+ versions_xattr[type] = hton64(versions[source] - versions[i]);
+ ret = dict_set_bin(xattr[i], EC_XATTR_VERSION, versions_xattr,
+ (sizeof(*versions_xattr) * EC_VERSION_SIZE));
+ if (ret < 0) {
+ op_ret = -ENOMEM;
+ continue;
+ }
+
+ if (erase_dirty) {
+ dirty_xattr = GF_CALLOC(EC_VERSION_SIZE, sizeof(*dirty_xattr),
+ gf_common_mt_pointer);
+ if (!dirty_xattr) {
+ op_ret = -ENOMEM;
+ continue;
}
- ec_mknod(heal->fop->frame, heal->xl, mask, EC_MINIMUM_ONE,
- ec_heal_mknod_cbk, heal, &heal->loc,
- st_mode_from_ia(heal->iatt.ia_prot, heal->iatt.ia_type),
- heal->iatt.ia_rdev, 0, xdata);
+ dirty_xattr[type] = hton64(-dirty[i]);
+ ret = dict_set_bin(xattr[i], EC_XATTR_DIRTY, dirty_xattr,
+ (sizeof(*dirty_xattr) * EC_VERSION_SIZE));
+ if (ret < 0) {
+ op_ret = -ENOMEM;
+ continue;
+ }
+ }
- break;
- }
- error = 0;
+ if (memcmp(versions_xattr, allzero,
+ (sizeof(*versions_xattr) * EC_VERSION_SIZE)) == 0) {
+ if (!erase_dirty) {
+ continue;
+ }
-out:
- if (xdata)
- dict_unref(xdata);
+ if (memcmp(dirty_xattr, allzero,
+ (sizeof(*dirty_xattr) * EC_VERSION_SIZE)) == 0) {
+ continue;
+ }
+ }
- return error;
-}
+ on[i] = 1;
+ call_count++;
+ }
-int32_t ec_heal_parent_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
- int32_t op_ret, int32_t op_errno, uintptr_t mask,
- uintptr_t good, uintptr_t bad, dict_t *xdata)
-{
- ec_fop_data_t *fop = cookie;
- ec_heal_t *heal = fop->data;
+ /* Update the bricks with xattr */
+ if (call_count) {
+ PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame,
+ ec_wind_xattrop_parallel, &loc,
+ GF_XATTROP_ADD_ARRAY64, xattr, NULL);
+ ret = cluster_fop_success_fill(replies, ec->nodes, output);
+ }
- /* Even if parent self-heal has failed, we try to heal the current entry */
- ec_heal_create(heal, fop->mask);
+ if (ret < call_count) {
+ op_ret = -ENOTCONN;
+ goto out;
+ }
- return 0;
+out:
+ /* Cleanup */
+ if (xattr) {
+ for (i = 0; i < ec->nodes; i++) {
+ if (xattr[i])
+ dict_unref(xattr[i]);
+ }
+ GF_FREE(xattr);
+ }
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ return op_ret;
}
-void ec_heal_parent(ec_heal_t *heal, uintptr_t mask)
+int
+ec_heal_metadata_find_direction(ec_t *ec, default_args_cbk_t *replies,
+ uint64_t *versions, uint64_t *dirty,
+ unsigned char *sources,
+ unsigned char *healed_sinks)
{
- loc_t parent;
- int32_t healing = 0;
+ uint64_t xattr[EC_VERSION_SIZE] = {0};
+ uint64_t max_version = 0;
+ int same_count = 0;
+ int max_same_count = 0;
+ int same_source = -1;
+ int ret = 0;
+ int i = 0;
+ int j = 0;
+ int *groups = NULL;
+ struct iatt source_ia = {0};
+ struct iatt child_ia = {0};
+
+ groups = alloca0(ec->nodes * sizeof(*groups));
+ for (i = 0; i < ec->nodes; i++)
+ groups[i] = -1;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (replies[i].op_ret < 0)
+ continue;
+ ret = ec_dict_get_array(replies[i].xdata, EC_XATTR_VERSION, xattr,
+ EC_VERSION_SIZE);
+ if (ret == 0) {
+ versions[i] = xattr[EC_METADATA_TXN];
+ }
- /* First we try to do a partial heal of the parent directory to avoid
- * ENOENT/ENOTDIR errors caused by missing parents */
- if (ec_loc_parent(heal->xl, &heal->loc, &parent) == 0) {
- if (!__is_root_gfid(parent.gfid)) {
- ec_heal(heal->fop->frame, heal->xl, mask, EC_MINIMUM_ONE,
- ec_heal_parent_cbk, heal, &parent, 1, NULL);
+ memset(xattr, 0, sizeof(xattr));
+ ret = ec_dict_get_array(replies[i].xdata, EC_XATTR_DIRTY, xattr,
+ EC_VERSION_SIZE);
+ if (ret == 0) {
+ dirty[i] = xattr[EC_METADATA_TXN];
+ }
+ if (groups[i] >= 0) /*Already part of group*/
+ continue;
+ groups[i] = i;
+ same_count = 1;
+ source_ia = replies[i].stat;
+ for (j = i + 1; j < ec->nodes; j++) {
+ if (!replies[j].valid || replies[j].op_ret < 0)
+ continue;
+ child_ia = replies[j].stat;
+ if (!IA_EQUAL(source_ia, child_ia, gfid) ||
+ !IA_EQUAL(source_ia, child_ia, type) ||
+ !IA_EQUAL(source_ia, child_ia, prot) ||
+ !IA_EQUAL(source_ia, child_ia, uid) ||
+ !IA_EQUAL(source_ia, child_ia, gid))
+ continue;
+ if (!are_dicts_equal(replies[i].xdata, replies[j].xdata,
+ ec_sh_key_match, NULL))
+ continue;
+ groups[j] = i;
+ same_count++;
+ }
- healing = 1;
+ if (max_same_count < same_count) {
+ max_same_count = same_count;
+ same_source = i;
}
- loc_wipe(&parent);
}
- if (!healing) {
- ec_heal_create(heal, mask);
+ if (max_same_count < ec->fragments) {
+ ret = -EIO;
+ goto out;
}
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (groups[i] == groups[same_source])
+ sources[i] = 1;
+ else if (replies[i].valid && replies[i].op_ret >= 0)
+ healed_sinks[i] = 1;
+ }
+ for (i = 0; i < ec->nodes; i++) {
+ if (sources[i] && (versions[i] > max_version)) {
+ same_source = i;
+ max_version = versions[i];
+ }
+ }
+ ret = same_source;
+out:
+ return ret;
}
-void ec_heal_recreate(ec_fop_data_t * fop)
+int
+__ec_heal_metadata_prepare(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *locked_on,
+ default_args_cbk_t *replies, uint64_t *versions,
+ uint64_t *dirty, unsigned char *sources,
+ unsigned char *healed_sinks)
{
- ec_cbk_data_t * cbk;
- ec_heal_t * heal = fop->data;
- uintptr_t mask = 0;
-
- if (heal->iatt.ia_type == IA_INVAL)
- {
- return;
+ loc_t loc = {0};
+ unsigned char *output = NULL;
+ unsigned char *lookup_on = NULL;
+ int ret = 0;
+ int source = 0;
+ default_args_cbk_t *greplies = NULL;
+ int i = 0;
+ EC_REPLIES_ALLOC(greplies, ec->nodes);
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ output = alloca0(ec->nodes);
+ lookup_on = alloca0(ec->nodes);
+ ret = cluster_lookup(ec->xl_list, locked_on, ec->nodes, replies, output,
+ frame, ec->xl, &loc, NULL);
+ if (ret <= ec->fragments) {
+ ret = -ENOTCONN;
+ goto out;
}
- list_for_each_entry(cbk, &fop->cbk_list, list)
- {
- if ((cbk->op_ret >= 0) || (cbk->op_errno == ENOENT) ||
- (cbk->op_errno == ENOTDIR))
- {
- mask |= cbk->mask;
+ memcpy(lookup_on, output, ec->nodes);
+ /*Use getxattr to get the filtered xattrs which filter internal xattrs*/
+ ret = cluster_getxattr(ec->xl_list, lookup_on, ec->nodes, greplies, output,
+ frame, ec->xl, &loc, NULL, NULL);
+ for (i = 0; i < ec->nodes; i++) {
+ if (lookup_on[i] && !output[i]) {
+ replies[i].valid = 0;
+ continue;
+ }
+ if (replies[i].xdata) {
+ dict_unref(replies[i].xdata);
+ replies[i].xdata = NULL;
+ if (greplies[i].xattr)
+ replies[i].xdata = dict_ref(greplies[i].xattr);
}
}
- if (mask != 0)
- {
- ec_heal_parent(heal, mask);
+ source = ec_heal_metadata_find_direction(ec, replies, versions, dirty,
+ sources, healed_sinks);
+ if (source < 0) {
+ ret = -EIO;
+ goto out;
}
+ ret = source;
+out:
+ cluster_replies_wipe(greplies, ec->nodes);
+ loc_wipe(&loc);
+ return ret;
}
-int32_t ec_heal_rmdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno,
- struct iatt * preparent, struct iatt * postparent,
- dict_t * xdata)
+/* Metadata heal */
+int
+__ec_removexattr_sinks(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ int source, unsigned char *sources,
+ unsigned char *healed_sinks, default_args_cbk_t *replies)
{
- ec_heal_update(cookie, 0);
- ec_heal_recreate(cookie);
+ int i = 0;
+ int ret = 0;
+ loc_t loc = {0};
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (i == source)
+ continue;
+ if (!sources[i] && !healed_sinks[i])
+ continue;
+ ret = dict_foreach(replies[i].xdata, ec_heal_xattr_clean,
+ replies[source].xdata);
+ if (ret < 0) {
+ sources[i] = 0;
+ healed_sinks[i] = 0;
+ continue;
+ }
+
+ if (replies[i].xdata->count == 0) {
+ continue;
+ } else if (sources[i]) {
+ /* This can happen if setxattr/removexattr succeeds on
+ * the bricks but fails to update the version. This
+ * will make sure that the xattrs are made equal after
+ * heal*/
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ }
+ ret = syncop_removexattr(ec->xl_list[i], &loc, "", replies[i].xdata,
+ NULL);
+ if (ret < 0)
+ healed_sinks[i] = 0;
+ }
+
+ loc_wipe(&loc);
+ if (EC_COUNT(healed_sinks, ec->nodes) == 0)
+ return -ENOTCONN;
return 0;
}
-int32_t ec_heal_unlink_cbk(call_frame_t * frame, void * cookie,
- xlator_t * this, int32_t op_ret, int32_t op_errno,
- struct iatt * preparent, struct iatt * postparent,
- dict_t * xdata)
+int
+__ec_heal_metadata(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *healed_sinks)
{
- ec_heal_update(cookie, 0);
- ec_heal_recreate(cookie);
+ loc_t loc = {0};
+ int ret = 0;
+ int source = 0;
+ default_args_cbk_t *replies = NULL;
+ default_args_cbk_t *sreplies = NULL;
+ uint64_t *versions = NULL;
+ uint64_t *dirty = NULL;
+ unsigned char *output = NULL;
+ dict_t *source_dict = NULL;
+ struct iatt source_buf = {0};
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ EC_REPLIES_ALLOC(sreplies, ec->nodes);
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ output = alloca0(ec->nodes);
+ versions = alloca0(ec->nodes * sizeof(*versions));
+ dirty = alloca0(ec->nodes * sizeof(*dirty));
+ source = __ec_heal_metadata_prepare(frame, ec, inode, locked_on, replies,
+ versions, dirty, sources, healed_sinks);
+ if (source < 0) {
+ ret = -EIO;
+ goto out;
+ }
- return 0;
+ if ((EC_COUNT(sources, ec->nodes) == ec->nodes) ||
+ (EC_COUNT(healed_sinks, ec->nodes) == 0)) {
+ ret = 0;
+ goto erase_dirty;
+ }
+
+ source_buf = replies[source].stat;
+ ret = cluster_setattr(ec->xl_list, healed_sinks, ec->nodes, sreplies,
+ output, frame, ec->xl, &loc, &source_buf,
+ GF_SET_ATTR_MODE | GF_SET_ATTR_UID | GF_SET_ATTR_GID,
+ NULL);
+ /*In case the operation fails on some of the subvols*/
+ memcpy(healed_sinks, output, ec->nodes);
+ if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ ret = __ec_removexattr_sinks(frame, ec, inode, source, sources,
+ healed_sinks, replies);
+ if (ret < 0)
+ goto out;
+
+ source_dict = dict_ref(replies[source].xdata);
+ if (dict_foreach_match(source_dict, ec_ignorable_key_match, NULL,
+ dict_remove_foreach_fn, NULL) == -1) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = cluster_setxattr(ec->xl_list, healed_sinks, ec->nodes, replies,
+ output, frame, ec->xl, &loc, source_dict, 0, NULL);
+
+ EC_INTERSECT(healed_sinks, healed_sinks, output, ec->nodes);
+ if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+erase_dirty:
+ ret = ec_adjust_versions(frame, ec, EC_METADATA_TXN, inode, source, sources,
+ healed_sinks, versions, dirty);
+out:
+ if (source_dict)
+ dict_unref(source_dict);
+
+ loc_wipe(&loc);
+ cluster_replies_wipe(replies, ec->nodes);
+ cluster_replies_wipe(sreplies, ec->nodes);
+ return ret;
}
-int32_t
-ec_heal_init (ec_fop_data_t * fop)
+int
+ec_heal_metadata(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *sources, unsigned char *healed_sinks)
{
- ec_t * ec = fop->xl->private;
- struct iobuf_pool * pool;
- inode_t * inode;
- ec_inode_t * ctx;
- ec_heal_t * heal = NULL;
- int32_t error = 0;
-
- inode = fop->loc[0].inode;
- if (inode == NULL)
+ unsigned char *locked_on = NULL;
+ unsigned char *up_subvols = NULL;
+ unsigned char *output = NULL;
+ int ret = 0;
+ default_args_cbk_t *replies = NULL;
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ locked_on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ up_subvols = alloca0(ec->nodes);
+ ec_mask_to_char_array(ec->xl_up, up_subvols, ec->nodes);
+ ret = cluster_inodelk(ec->xl_list, up_subvols, ec->nodes, replies,
+ locked_on, frame, ec->xl, ec->xl->name, inode, 0, 0);
{
- gf_log(fop->xl->name, GF_LOG_WARNING, "Unable to start inode healing "
- "because there is not enough "
- "information");
+ if (ret <= ec->fragments) {
+ gf_msg_debug(ec->xl->name, 0,
+ "%s: Skipping heal "
+ "as only %d number of subvolumes could "
+ "be locked",
+ uuid_utoa(inode->gfid), ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+ ret = __ec_heal_metadata(frame, ec, inode, locked_on, sources,
+ healed_sinks);
+ }
+unlock:
+ cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
+ ec->xl, ec->xl->name, inode, 0, 0);
+ cluster_replies_wipe(replies, ec->nodes);
+ return ret;
+}
+
+/*entry heal*/
+int
+__ec_heal_entry_prepare(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *locked_on, uint64_t *versions,
+ uint64_t *dirty, unsigned char *sources,
+ unsigned char *healed_sinks)
+{
+ loc_t loc = {0};
+ int source = 0;
+ int ret = 0;
+ default_args_cbk_t *replies = NULL;
+ unsigned char *output = NULL;
+ dict_t *xdata = NULL;
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
- return ENODATA;
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ xdata = dict_new();
+ if (!xdata) {
+ ret = -ENOMEM;
+ goto out;
}
- heal = GF_MALLOC(sizeof(ec_heal_t), ec_mt_ec_heal_t);
- if (heal == NULL)
- {
- return ENOMEM;
+ if (dict_set_uint64(xdata, EC_XATTR_VERSION, 0) ||
+ dict_set_uint64(xdata, EC_XATTR_DIRTY, 0)) {
+ ret = -ENOMEM;
+ goto out;
}
- memset(heal, 0, sizeof(ec_heal_t));
+ output = alloca0(ec->nodes);
+ ret = cluster_lookup(ec->xl_list, locked_on, ec->nodes, replies, output,
+ frame, ec->xl, &loc, xdata);
+ if (ret <= ec->fragments) {
+ ret = -ENOTCONN;
+ goto out;
+ }
- if (ec_loc_from_loc(fop->xl, &heal->loc, &fop->loc[0]) != 0) {
- error = ENOMEM;
+ source = ec_heal_entry_find_direction(ec, replies, versions, dirty, sources,
+ healed_sinks);
+ if (source < 0) {
+ ret = -EIO;
+ goto out;
+ }
+ ret = source;
+out:
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(&loc);
+ cluster_replies_wipe(replies, ec->nodes);
+ return ret;
+}
+int32_t
+ec_set_new_entry_dirty(ec_t *ec, loc_t *loc, struct iatt *ia,
+ call_frame_t *frame, xlator_t *this, unsigned char *on)
+{
+ dict_t *xattr = NULL;
+ int32_t ret = -1;
+ default_args_cbk_t *replies = NULL;
+ unsigned char *output = NULL;
+ uint64_t dirty[EC_VERSION_SIZE] = {1, 1};
+ loc_t newloc = {0};
+
+ /*Symlinks don't have any data to be healed*/
+ if (ia->ia_type == IA_IFLNK)
+ dirty[EC_DATA_TXN] = 0;
+
+ newloc.inode = inode_ref(loc->inode);
+ gf_uuid_copy(newloc.gfid, ia->ia_gfid);
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ output = alloca0(ec->nodes);
+ xattr = dict_new();
+ if (!xattr) {
+ ret = -ENOMEM;
goto out;
}
- LOCK_INIT(&heal->lock);
+ ret = ec_dict_set_array(xattr, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE);
+ if (ret)
+ goto out;
- heal->xl = fop->xl;
- heal->fop = fop;
- pool = fop->xl->ctx->iobuf_pool;
- heal->size = iobpool_default_pagesize(pool) * ec->fragments;
- heal->partial = fop->int32;
- fop->heal = heal;
+ ret = cluster_xattrop(ec->xl_list, on, ec->nodes, replies, output, frame,
+ ec->xl, &newloc, GF_XATTROP_ADD_ARRAY64, xattr, NULL);
- LOCK(&inode->lock);
+ if (ret < ec->fragments) {
+ ret = -ENOTCONN;
+ goto out;
+ }
- ctx = __ec_inode_get(inode, fop->xl);
- if (ctx == NULL)
- {
- error = EIO;
+out:
+ if (xattr)
+ dict_unref(xattr);
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&newloc);
+ return ret;
+}
- goto unlock;
+/*Name heal*/
+int
+ec_delete_stale_name(dict_t *gfid_db, char *key, data_t *d, void *data)
+{
+ struct ec_name_data *name_data = data;
+ struct iatt *ia = NULL;
+ ec_t *ec = NULL;
+ loc_t loc = {0};
+ unsigned char *same = data_to_bin(d);
+ default_args_cbk_t *replies = NULL;
+ unsigned char *output = NULL;
+ int ret = 0;
+ int estale_count = 0;
+ int i = 0;
+ call_frame_t *frame = name_data->frame;
+ uuid_t gfid;
+
+ ec = name_data->frame->this->private;
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ if (EC_COUNT(same, ec->nodes) >= ec->fragments) {
+ ret = 0;
+ goto out;
}
- if (list_empty(&ctx->heal)) {
- gf_log("ec", GF_LOG_INFO, "Healing '%s', gfid %s", heal->loc.path,
- uuid_utoa(heal->loc.gfid));
- } else {
- error = EEXIST;
+ loc.parent = inode_ref(name_data->parent);
+ loc.inode = inode_new(name_data->parent->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
}
- list_add_tail(&heal->list, &ctx->heal);
- heal = NULL;
+ gf_uuid_parse(key, gfid);
+ gf_uuid_copy(loc.pargfid, name_data->parent->gfid);
+ loc.name = name_data->name;
+ output = alloca0(ec->nodes);
+ ret = cluster_lookup(ec->xl_list, name_data->participants, ec->nodes,
+ replies, output, name_data->frame, ec->xl, &loc, NULL);
-unlock:
- UNLOCK(&inode->lock);
+ for (i = 0; i < ec->nodes; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (replies[i].op_ret == -1) {
+ if (replies[i].op_errno == ESTALE || replies[i].op_errno == ENOENT)
+ estale_count++;
+ else
+ name_data->participants[i] = 0;
+ } else if (gf_uuid_compare(gfid, replies[i].stat.ia_gfid)) {
+ estale_count++;
+ gf_msg_debug(ec->xl->name, 0, "%s/%s: different gfid as %s",
+ uuid_utoa(name_data->parent->gfid), name_data->name,
+ key);
+ }
+ }
- if (error == EEXIST) {
- LOCK(&fop->lock);
+ if (estale_count <= ec->redundancy) {
+ /* We have at least ec->fragments number of fragments, so the
+ * file is recoverable, so don't delete it*/
- fop->jobs++;
- fop->refs++;
+ /* Please note that the lookup call above could fail with
+ * ENOTCONN on all subvoumes and still this branch will be
+ * true, but in those cases conservatively we decide to not
+ * delete the file until we are sure*/
+ ret = 0;
+ goto out;
+ }
- UNLOCK(&fop->lock);
+ /*Noway to recover, delete the name*/
+ loc_wipe(&loc);
+ loc.parent = inode_ref(name_data->parent);
+ gf_uuid_copy(loc.pargfid, loc.parent->gfid);
+ loc.name = name_data->name;
+ for (i = 0; i < ec->nodes; i++) {
+ if (same[i] && replies[i].valid && (replies[i].op_ret == 0)) {
+ ia = &replies[i].stat;
+ break;
+ }
+ }
- error = 0;
+ if (!ia) {
+ ret = -ENOTCONN;
+ goto out;
}
+ if (IA_ISDIR(ia->ia_type)) {
+ ret = cluster_rmdir(ec->xl_list, same, ec->nodes, replies, output,
+ frame, ec->xl, &loc, 1, NULL);
+ gf_msg_debug(ec->xl->name, 0,
+ "cluster rmdir succeeded on %d "
+ "nodes",
+ ret);
+ } else {
+ ret = cluster_unlink(ec->xl_list, same, ec->nodes, replies, output,
+ frame, ec->xl, &loc, 0, NULL);
+ gf_msg_debug(ec->xl->name, 0,
+ "cluster unlink succeeded on %d "
+ "nodes",
+ ret);
+ }
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (output[i]) {
+ same[i] = 0;
+ name_data->enoent[i] = 1;
+ } else {
+ /*op failed*/
+ if (same[i])
+ name_data->participants[i] = 0;
+ }
+ }
+ ret = 0;
+ /*This will help in making decisions about creating names*/
+ dict_del(gfid_db, key);
out:
- GF_FREE(heal);
+ if (ret < 0) {
+ gf_msg_debug(ec->xl->name, 0, "%s/%s: heal failed %s",
+ uuid_utoa(name_data->parent->gfid), name_data->name,
+ strerror(-ret));
+ }
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ return ret;
+}
- return error;
+int
+ec_delete_stale_names(call_frame_t *frame, ec_t *ec, inode_t *parent,
+ char *name, default_args_cbk_t *replies, dict_t *gfid_db,
+ unsigned char *enoent, unsigned char *gfidless,
+ unsigned char *participants)
+{
+ struct ec_name_data name_data = {0};
+
+ name_data.enoent = enoent;
+ name_data.gfidless = gfidless;
+ name_data.participants = participants;
+ name_data.name = name;
+ name_data.parent = parent;
+ name_data.frame = frame;
+ name_data.replies = replies;
+ return dict_foreach(gfid_db, ec_delete_stale_name, &name_data);
}
-void ec_heal_entrylk(ec_heal_t * heal, entrylk_cmd cmd)
+int
+_assign_same(dict_t *dict, char *key, data_t *value, void *data)
{
- loc_t loc;
+ struct ec_name_data *name_data = data;
- if (ec_loc_parent(heal->xl, &heal->loc, &loc) != 0) {
- ec_fop_set_error(heal->fop, EIO);
+ name_data->same = data_to_bin(value);
+ return 0;
+}
- return;
+int
+ec_create_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
+ default_args_cbk_t *lookup_replies, dict_t *gfid_db,
+ unsigned char *enoent, unsigned char *participants)
+{
+ int ret = 0;
+ int i = 0;
+ struct ec_name_data name_data = {0};
+ struct iatt *ia = NULL;
+ unsigned char *output = 0;
+ unsigned char *output1 = 0;
+ unsigned char *on = NULL;
+ default_args_cbk_t *replies = NULL;
+ loc_t loc = {0};
+ loc_t srcloc = {0};
+ unsigned char *link = NULL;
+ unsigned char *create = NULL;
+ dict_t *xdata = NULL;
+ char *linkname = NULL;
+ ec_config_t config;
+
+ /* There should be just one gfid key */
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ if (gfid_db->count != 1) {
+ ret = -EINVAL;
+ goto out;
}
- ec_entrylk(heal->fop->frame, heal->xl, -1, EC_MINIMUM_ALL, NULL, NULL,
- heal->xl->name, &loc, NULL, cmd, ENTRYLK_WRLCK, NULL);
+ ret = dict_foreach(gfid_db, _assign_same, &name_data);
+ if (ret < 0)
+ goto out;
+ /*There should at least be one valid success reply with gfid*/
+ for (i = 0; i < ec->nodes; i++)
+ if (name_data.same[i])
+ break;
- loc_wipe(&loc);
-}
+ if (i == ec->nodes) {
+ ret = -EINVAL;
+ goto out;
+ }
-void ec_heal_inodelk(ec_heal_t * heal, int32_t type, int32_t use_fd,
- off_t offset, size_t size)
-{
- struct gf_flock flock;
+ ia = &lookup_replies[i].stat;
+ xdata = dict_new();
+ loc.parent = inode_ref(parent);
+ gf_uuid_copy(loc.pargfid, parent->gfid);
+ loc.inode = inode_new(parent->table);
+ if (loc.inode)
+ srcloc.inode = inode_ref(loc.inode);
+ gf_uuid_copy(srcloc.gfid, ia->ia_gfid);
+ if (!loc.inode || !xdata ||
+ dict_set_static_bin(xdata, "gfid-req", ia->ia_gfid,
+ sizeof(ia->ia_gfid))) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ loc.name = name;
+ link = alloca0(ec->nodes);
+ create = alloca0(ec->nodes);
+ on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ output1 = alloca0(ec->nodes);
- flock.l_type = type;
- flock.l_whence = SEEK_SET;
- flock.l_start = offset;
- flock.l_len = size;
- flock.l_pid = 0;
- flock.l_owner.len = 0;
+ for (i = 0; i < ec->nodes; i++) {
+ if (!lookup_replies[i].valid)
+ continue;
+ if (lookup_replies[i].op_ret)
+ continue;
+ on[i] = 1;
+ }
+ switch (ia->ia_type) {
+ case IA_IFDIR:
+ ec_set_new_entry_dirty(ec, &loc, ia, frame, ec->xl, on);
+ (void)cluster_mkdir(
+ ec->xl_list, enoent, ec->nodes, replies, output, frame, ec->xl,
+ &loc, st_mode_from_ia(ia->ia_prot, ia->ia_type), 0, xdata);
+ break;
- if (use_fd)
- {
- ec_finodelk(heal->fop->frame, heal->xl, heal->fop->mask,
- EC_MINIMUM_ALL, NULL, NULL, heal->xl->name, heal->fd,
- F_SETLKW, &flock, NULL);
+ case IA_IFLNK:
+ /*Check for hard links and create/link*/
+ ret = cluster_lookup(ec->xl_list, enoent, ec->nodes, replies,
+ output, frame, ec->xl, &srcloc, NULL);
+ for (i = 0; i < ec->nodes; i++) {
+ if (output[i]) {
+ link[i] = 1;
+ } else {
+ if (replies[i].op_errno == ENOENT ||
+ replies[i].op_errno == ESTALE) {
+ create[i] = 1;
+ }
+ }
+ }
+
+ if (EC_COUNT(link, ec->nodes)) {
+ cluster_link(ec->xl_list, link, ec->nodes, replies, output1,
+ frame, ec->xl, &srcloc, &loc, NULL);
+ }
+
+ if (EC_COUNT(create, ec->nodes)) {
+ cluster_readlink(ec->xl_list, name_data.same, ec->nodes,
+ replies, output, frame, ec->xl, &srcloc, 4096,
+ NULL);
+ if (EC_COUNT(output, ec->nodes) == 0) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (output[i])
+ break;
+ }
+ linkname = alloca0(strlen(replies[i].buf) + 1);
+ strcpy(linkname, replies[i].buf);
+ ec_set_new_entry_dirty(ec, &loc, ia, frame, ec->xl, on);
+ cluster_symlink(ec->xl_list, create, ec->nodes, replies, output,
+ frame, ec->xl, linkname, &loc, 0, xdata);
+ }
+ for (i = 0; i < ec->nodes; i++)
+ if (output1[i])
+ output[i] = 1;
+ break;
+ case IA_IFREG:
+ ec_set_new_entry_dirty(ec, &loc, ia, frame, ec->xl, on);
+ config.version = EC_CONFIG_VERSION;
+ config.algorithm = EC_CONFIG_ALGORITHM;
+ config.gf_word_size = EC_GF_BITS;
+ config.bricks = ec->nodes;
+ config.redundancy = ec->redundancy;
+ config.chunk_size = EC_METHOD_CHUNK_SIZE;
+
+ ret = ec_dict_set_config(xdata, EC_XATTR_CONFIG, &config);
+ if (ret != 0) {
+ goto out;
+ }
+
+ /* Fall through */
+
+ default:
+ ret = dict_set_int32(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
+ if (ret)
+ goto out;
+ ret = cluster_mknod(
+ ec->xl_list, enoent, ec->nodes, replies, output, frame, ec->xl,
+ &loc, st_mode_from_ia(ia->ia_prot, ia->ia_type),
+ makedev(ia_major(ia->ia_rdev), ia_minor(ia->ia_rdev)), 0,
+ xdata);
+ break;
}
- else
- {
- ec_inodelk(heal->fop->frame, heal->xl, heal->fop->mask, EC_MINIMUM_ALL,
- NULL, NULL, heal->xl->name, &heal->loc, F_SETLKW, &flock,
- NULL);
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (enoent[i] && !output[i])
+ participants[i] = 0;
}
+
+ ret = 0;
+out:
+ if (ret < 0)
+ gf_msg_debug(ec->xl->name, 0, "%s/%s: heal failed %s",
+ uuid_utoa(parent->gfid), name, strerror(-ret));
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ loc_wipe(&srcloc);
+ if (xdata)
+ dict_unref(xdata);
+ return ret;
}
-void ec_heal_lookup(ec_heal_t *heal, uintptr_t mask)
+int
+__ec_heal_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
+ unsigned char *participants)
{
- dict_t * xdata;
- int32_t error = ENOMEM;
-
+ unsigned char *output = NULL;
+ unsigned char *enoent = NULL;
+ default_args_cbk_t *replies = NULL;
+ dict_t *xdata = NULL;
+ dict_t *gfid_db = NULL;
+ int ret = 0;
+ loc_t loc = {0};
+ int i = 0;
+ struct iatt *ia = NULL;
+ char gfid[64] = {0};
+ unsigned char *same = NULL;
+ unsigned char *gfidless = NULL;
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ loc.parent = inode_ref(parent);
+ loc.inode = inode_new(parent->table);
+ gf_uuid_copy(loc.pargfid, parent->gfid);
+ loc.name = name;
xdata = dict_new();
- if (xdata == NULL)
- {
+ gfid_db = dict_new();
+ if (!xdata || !gfid_db || !loc.inode) {
+ ret = -ENOMEM;
goto out;
}
- if (dict_set_uint64(xdata, "list-xattr", 0) != 0)
- {
+
+ ret = dict_set_int32(xdata, GF_GFIDLESS_LOOKUP, 1);
+ if (ret) {
+ ret = -ENOMEM;
goto out;
}
- ec_lookup(heal->fop->frame, heal->xl, mask, EC_MINIMUM_MIN,
- ec_heal_inode_lookup_cbk, heal, &heal->loc, xdata);
+ output = alloca0(ec->nodes);
+ gfidless = alloca0(ec->nodes);
+ enoent = alloca0(ec->nodes);
+ ret = cluster_lookup(ec->xl_list, participants, ec->nodes, replies, output,
+ frame, ec->xl, &loc, NULL);
+ for (i = 0; i < ec->nodes; i++) {
+ if (!replies[i].valid)
+ continue;
- error = 0;
+ if (replies[i].op_ret == -1) {
+ /*If ESTALE comes here, that means parent dir is not
+ * present, nothing to do there, so reset participants
+ * for that brick*/
+ if (replies[i].op_errno == ENOENT)
+ enoent[i] = 1;
+ else
+ participants[i] = 0;
+ continue;
+ }
+ ia = &replies[i].stat;
+ if (gf_uuid_is_null(ia->ia_gfid)) {
+ if (IA_ISDIR(ia->ia_type) || ia->ia_size == 0)
+ gfidless[i] = 1;
+ else
+ participants[i] = 0;
+ } else {
+ uuid_utoa_r(ia->ia_gfid, gfid);
+ ret = dict_get_bin(gfid_db, gfid, (void **)&same);
+ if (ret < 0) {
+ same = alloca0(ec->nodes);
+ }
+ same[i] = 1;
+ if (ret < 0) {
+ ret = dict_set_static_bin(gfid_db, gfid, same, ec->nodes);
+ }
+ if (ret < 0)
+ goto out;
+ }
+ }
-out:
- if (xdata != NULL)
- {
- dict_unref(xdata);
+ ret = ec_delete_stale_names(frame, ec, parent, name, replies, gfid_db,
+ enoent, gfidless, participants);
+
+ if (gfid_db->count == 0) {
+ /* All entries seem to be stale entries and deleted,
+ * nothing more to do.*/
+ goto out;
}
- ec_fop_set_error(heal->fop, error);
+ if (gfid_db->count > 1) {
+ gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
+ "%s/%s: Not able to heal", uuid_utoa(parent->gfid), name);
+ memset(participants, 0, ec->nodes);
+ goto out;
+ }
+
+ EC_INTERSECT(enoent, enoent, participants, ec->nodes);
+ if (EC_COUNT(enoent, ec->nodes) == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = ec_create_name(frame, ec, parent, name, replies, gfid_db, enoent,
+ participants);
+ if (ret >= 0) {
+ /* If ec_create_name() succeeded we return 1 to indicate that a new
+ * file has been created and it will need to be healed. */
+ ret = 1;
+ }
+out:
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ if (xdata)
+ dict_unref(xdata);
+ if (gfid_db)
+ dict_unref(gfid_db);
+ return ret;
}
-void ec_heal_remove(ec_heal_t * heal, ec_cbk_data_t * cbk)
+int
+ec_heal_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
+ unsigned char *participants)
{
- if (cbk->iatt[0].ia_type == IA_IFDIR)
- {
- ec_rmdir(heal->fop->frame, heal->xl, cbk->mask, EC_MINIMUM_ONE,
- ec_heal_rmdir_cbk, heal, &heal->loc, 1, NULL);
+ int ret = 0;
+ default_args_cbk_t *replies = NULL;
+ unsigned char *output = NULL;
+ unsigned char *locked_on = NULL;
+ loc_t loc = {0};
+
+ loc.parent = inode_ref(parent);
+ loc.name = name;
+ loc.inode = inode_new(parent->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
}
- else
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ output = alloca0(ec->nodes);
+ locked_on = alloca0(ec->nodes);
+ ret = cluster_inodelk(ec->xl_list, participants, ec->nodes, replies,
+ locked_on, frame, ec->xl, ec->xl->name, parent, 0, 0);
{
- ec_unlink(heal->fop->frame, heal->xl, cbk->mask, EC_MINIMUM_ONE,
- ec_heal_unlink_cbk, heal, &heal->loc, 0, NULL);
+ if (ret <= ec->fragments) {
+ gf_msg_debug(ec->xl->name, 0,
+ "%s/%s: Skipping "
+ "heal as only %d number of subvolumes could "
+ "be locked",
+ uuid_utoa(parent->gfid), name, ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+ EC_INTERSECT(participants, participants, locked_on, ec->nodes);
+ ret = __ec_heal_name(frame, ec, parent, name, participants);
}
+unlock:
+ cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
+ ec->xl, ec->xl->name, parent, 0, 0);
+out:
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ return ret;
}
-void ec_heal_remove_others(ec_heal_t * heal)
+int
+ec_name_heal_handler(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
{
- struct list_head * item;
- ec_cbk_data_t * cbk;
-
- item = heal->lookup->cbk_list.next;
- do
- {
- item = item->next;
- cbk = list_entry(item, ec_cbk_data_t, list);
+ struct ec_name_data *name_data = data;
+ xlator_t *this = THIS;
+ ec_t *ec = this->private;
+ unsigned char *name_on = alloca0(ec->nodes);
+ int i = 0;
+ int ret = 0;
+
+ if (ec->shutdown) {
+ gf_msg_debug(this->name, 0,
+ "Cancelling directory heal "
+ "because EC is stopping.");
+ return -ENOTCONN;
+ }
+
+ memcpy(name_on, name_data->participants, ec->nodes);
+ ret = ec_heal_name(name_data->frame, ec, parent->inode, entry->d_name,
+ name_on);
+
+ if (ret < 0) {
+ memset(name_on, 0, ec->nodes);
+ } else {
+ name_data->heal_pending += ret;
+ }
- if (cbk->op_ret < 0)
- {
- if ((cbk->op_errno != ENOENT) && (cbk->op_errno != ENOTDIR) &&
- (cbk->op_errno != ESTALE))
- {
- gf_log(heal->xl->name, GF_LOG_WARNING, "Don't know how to "
- "remove inode with "
- "error %d",
- cbk->op_errno);
- }
+ for (i = 0; i < ec->nodes; i++)
+ if (name_data->participants[i] && !name_on[i])
+ name_data->failed_on[i] = 1;
- ec_heal_exclude(heal, cbk->mask);
+ return 0;
+}
+int
+ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *participants, uint32_t *pending)
+{
+ int i = 0;
+ int j = 0;
+ loc_t loc = {0};
+ struct ec_name_data name_data = {0};
+ int ret = 0;
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ name_data.frame = frame;
+ name_data.participants = participants;
+ name_data.failed_on = alloca0(ec->nodes);
+ name_data.heal_pending = 0;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!participants[i])
continue;
+ ret = syncop_dir_scan(ec->xl_list[i], &loc, GF_CLIENT_PID_SELF_HEALD,
+ &name_data, ec_name_heal_handler);
+ if (ret < 0) {
+ break;
}
+ for (j = 0; j < ec->nodes; j++)
+ if (name_data.failed_on[j])
+ participants[j] = 0;
- ec_heal_remove(heal, cbk);
- } while (item->next != &heal->lookup->cbk_list);
+ if (EC_COUNT(participants, ec->nodes) <= ec->fragments) {
+ ret = -ENOTCONN;
+ break;
+ }
+ }
+ *pending += name_data.heal_pending;
+
+ loc_wipe(&loc);
+ return ret;
}
-void ec_heal_prepare_others(ec_heal_t * heal)
+int
+__ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *heal_on, unsigned char *sources,
+ unsigned char *healed_sinks, uint32_t *pending)
{
- struct list_head * item;
- ec_cbk_data_t * cbk;
-
- item = heal->lookup->cbk_list.next;
- while (item->next != &heal->lookup->cbk_list)
+ unsigned char *locked_on = NULL;
+ unsigned char *output = NULL;
+ uint64_t *versions = NULL;
+ uint64_t *dirty = NULL;
+ unsigned char *participants = NULL;
+ default_args_cbk_t *replies = NULL;
+ int ret = 0;
+ int source = 0;
+ int i = 0;
+
+ locked_on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ versions = alloca0(ec->nodes * sizeof(*versions));
+ dirty = alloca0(ec->nodes * sizeof(*dirty));
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ ret = cluster_inodelk(ec->xl_list, heal_on, ec->nodes, replies, locked_on,
+ frame, ec->xl, ec->xl->name, inode, 0, 0);
{
- item = item->next;
- cbk = list_entry(item, ec_cbk_data_t, list);
+ if (ret <= ec->fragments) {
+ gf_msg_debug(ec->xl->name, 0,
+ "%s: Skipping heal "
+ "as only %d number of subvolumes could "
+ "be locked",
+ uuid_utoa(inode->gfid), ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+ ret = __ec_heal_entry_prepare(frame, ec, inode, locked_on, versions,
+ dirty, sources, healed_sinks);
+ source = ret;
+ }
+unlock:
+ cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
+ ec->xl, ec->xl->name, inode, 0, 0);
+ if (ret < 0)
+ goto out;
- if (cbk->op_ret < 0)
- {
- if ((cbk->op_errno == ENOENT) || (cbk->op_errno == ESTALE))
- {
- ec_heal_create(heal, cbk->mask);
- }
- else
- {
- gf_log(heal->xl->name, GF_LOG_ERROR, "Don't know how to "
- "heal error %d",
- cbk->op_errno);
+ participants = alloca0(ec->nodes);
+ for (i = 0; i < ec->nodes; i++) {
+ if (sources[i] || healed_sinks[i])
+ participants[i] = 1;
+ }
+ ret = ec_heal_names(frame, ec, inode, participants, pending);
- ec_heal_exclude(heal, cbk->mask);
- }
- }
- else
- {
- if ((heal->iatt.ia_type != cbk->iatt[0].ia_type) ||
- (gf_uuid_compare(heal->iatt.ia_gfid, cbk->iatt[0].ia_gfid) != 0))
- {
- ec_heal_remove(heal, cbk);
- }
+ if (EC_COUNT(participants, ec->nodes) <= ec->fragments)
+ goto out;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!participants[i]) {
+ sources[i] = 0;
+ healed_sinks[i] = 0;
}
}
+
+ ec_adjust_versions(frame, ec, EC_DATA_TXN, inode, source, sources,
+ healed_sinks, versions, dirty);
+out:
+ cluster_replies_wipe(replies, ec->nodes);
+ return ret;
}
-int32_t ec_heal_readlink_cbk(call_frame_t * frame, void * cookie,
- xlator_t * this, int32_t op_ret, int32_t op_errno,
- const char * path, struct iatt * buf,
- dict_t * xdata)
+int
+ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *sources, unsigned char *healed_sinks,
+ uint32_t *pending)
{
- ec_fop_data_t * fop = cookie;
- ec_heal_t * heal = fop->data;
-
- if (op_ret >= 0)
+ unsigned char *locked_on = NULL;
+ unsigned char *up_subvols = NULL;
+ unsigned char *output = NULL;
+ char selfheal_domain[1024] = {0};
+ int ret = 0;
+ default_args_cbk_t *replies = NULL;
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ locked_on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ up_subvols = alloca0(ec->nodes);
+
+ sprintf(selfheal_domain, "%s:self-heal", ec->xl->name);
+ ec_mask_to_char_array(ec->xl_up, up_subvols, ec->nodes);
+ /*If other processes are already doing the heal, don't block*/
+ ret = cluster_tiebreaker_inodelk(ec->xl_list, up_subvols, ec->nodes,
+ replies, locked_on, frame, ec->xl,
+ selfheal_domain, inode, 0, 0);
{
- heal->symlink = gf_strdup(path);
- if (heal->symlink != NULL)
- {
- ec_heal_prepare_others(heal);
- }
- else
- {
- ec_fop_set_error(fop, EIO);
+ if (ret <= ec->fragments) {
+ gf_msg_debug(ec->xl->name, 0,
+ "%s: Skipping heal "
+ "as only %d number of subvolumes could "
+ "be locked",
+ uuid_utoa(inode->gfid), ret);
+ ret = -ENOTCONN;
+ goto unlock;
}
+ ret = __ec_heal_entry(frame, ec, inode, locked_on, sources,
+ healed_sinks, pending);
}
-
- return 0;
+unlock:
+ cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
+ ec->xl, selfheal_domain, inode, 0, 0);
+ cluster_replies_wipe(replies, ec->nodes);
+ return ret;
}
-ec_cbk_data_t * ec_heal_lookup_check(ec_heal_t * heal, uintptr_t * pgood,
- uintptr_t * pbad)
+/*Find direction for data heal and heal info*/
+int
+ec_heal_data_find_direction(ec_t *ec, default_args_cbk_t *replies,
+ uint64_t *data_versions, uint64_t *dirty,
+ uint64_t *size, unsigned char *sources,
+ unsigned char *healed_sinks,
+ gf_boolean_t check_ondisksize, int which)
{
- ec_fop_data_t * fop = heal->lookup;
- ec_cbk_data_t * cbk = NULL, * ans = NULL;
- uintptr_t good = 0, bad = 0;
+ uint64_t xattr[EC_VERSION_SIZE] = {0};
+ char version_size[128] = {0};
+ dict_t *version_size_db = NULL;
+ unsigned char *same = NULL;
+ int max_same_count = 0;
+ int source = 0;
+ int i = 0;
+ int ret = 0;
+ dict_t *dict = NULL;
+ uint64_t source_size = 0;
+
+ version_size_db = dict_new();
+ if (!version_size_db) {
+ ret = -ENOMEM;
+ goto out;
+ }
- list_for_each_entry(ans, &fop->cbk_list, list)
- {
- if ((ans->op_ret < 0) && (ans->op_errno == ENOTCONN))
- {
+ for (i = 0; i < ec->nodes; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (replies[i].op_ret < 0)
continue;
+ dict = (which == EC_COMBINE_XDATA) ? replies[i].xdata
+ : replies[i].xattr;
+
+ ret = ec_dict_get_array(dict, EC_XATTR_VERSION, xattr, EC_VERSION_SIZE);
+ if (ret == 0) {
+ data_versions[i] = xattr[EC_DATA_TXN];
}
- if (ans == fop->answer)
- {
- good |= ans->mask;
- cbk = ans;
+ memset(xattr, 0, sizeof(xattr));
+ ret = ec_dict_get_array(dict, EC_XATTR_DIRTY, xattr, EC_VERSION_SIZE);
+ if (ret == 0) {
+ dirty[i] = xattr[EC_DATA_TXN];
}
- else
- {
- bad |= ans->mask;
+ ret = ec_dict_del_number(dict, EC_XATTR_SIZE, &size[i]);
+ /*Build a db of same metadata and data version and size*/
+ snprintf(version_size, sizeof(version_size), "%" PRIu64 "-%" PRIu64,
+ data_versions[i], size[i]);
+
+ ret = dict_get_bin(version_size_db, version_size, (void **)&same);
+ if (ret < 0) {
+ same = alloca0(ec->nodes);
}
- }
-
- *pgood = good;
- *pbad = bad;
-
- return cbk;
-}
-
-void ec_heal_prepare(ec_heal_t * heal)
-{
- ec_cbk_data_t * cbk;
- int32_t error = ENOMEM;
- heal->available = heal->good;
+ same[i] = 1;
+ if (max_same_count < EC_COUNT(same, ec->nodes)) {
+ max_same_count = EC_COUNT(same, ec->nodes);
+ source = i;
+ }
- cbk = heal->lookup->answer;
- if (cbk->op_ret < 0)
- {
- if ((cbk->op_errno == ENOENT) || (cbk->op_errno == ENOTDIR))
- {
- ec_heal_remove_others(heal);
+ if (ret < 0) {
+ ret = dict_set_static_bin(version_size_db, version_size, same,
+ ec->nodes);
}
- else
- {
- gf_log(heal->xl->name, GF_LOG_ERROR, "Don't know how to heal "
- "error %d",
- cbk->op_errno);
+
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto out;
}
}
- else
- {
- if (heal->iatt.ia_type == IA_IFREG)
- {
- heal->fd = fd_create(heal->loc.inode, heal->fop->frame->root->pid);
- if (heal->fd == NULL)
- {
- gf_log(heal->xl->name, GF_LOG_ERROR, "Unable to create a new "
- "file descriptor");
+ /* If we don't have ec->fragments number of same version,size it is not
+ * recoverable*/
+ if (max_same_count < ec->fragments) {
+ ret = -EIO;
+ goto out;
+ } else {
+ snprintf(version_size, sizeof(version_size), "%" PRIu64 "-%" PRIu64,
+ data_versions[source], size[source]);
- goto out;
- }
+ ret = dict_get_bin(version_size_db, version_size, (void **)&same);
+ if (ret < 0)
+ goto out;
+ memcpy(sources, same, ec->nodes);
+ for (i = 0; i < ec->nodes; i++) {
+ if (replies[i].valid && (replies[i].op_ret == 0) && !sources[i])
+ healed_sinks[i] = 1;
}
+ }
- if (heal->iatt.ia_type == IA_IFLNK)
- {
- ec_readlink(heal->fop->frame, heal->xl, cbk->mask, EC_MINIMUM_ONE,
- ec_heal_readlink_cbk, heal, &heal->loc,
- heal->iatt.ia_size, NULL);
+ /* There could be files with versions, size same but on disk ia_size
+ * could be different because of disk crashes, mark them as sinks as
+ * well*/
+
+ if (check_ondisksize) {
+ source_size = size[source];
+ ec_adjust_size_up(ec, &source_size, _gf_true);
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (sources[i]) {
+ if (replies[i].stat.ia_size != source_size) {
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ max_same_count--;
+ } else {
+ source = i;
+ }
+ }
}
- else
- {
- ec_heal_prepare_others(heal);
+ if (max_same_count < ec->fragments) {
+ ret = -EIO;
+ goto out;
}
}
- error = 0;
-
+ ret = source;
out:
- ec_fop_set_error(heal->fop, error);
+ if (version_size_db)
+ dict_unref(version_size_db);
+ return ret;
}
-int32_t ec_heal_open_others(ec_heal_t * heal)
+int
+__ec_heal_data_prepare(call_frame_t *frame, ec_t *ec, fd_t *fd,
+ unsigned char *locked_on, uint64_t *versions,
+ uint64_t *dirty, uint64_t *size, unsigned char *sources,
+ unsigned char *healed_sinks, unsigned char *trim,
+ struct iatt *stbuf)
{
- struct list_head * item;
- ec_cbk_data_t * cbk;
- uintptr_t mask = 0, open = heal->open;
+ default_args_cbk_t *replies = NULL;
+ default_args_cbk_t *fstat_replies = NULL;
+ unsigned char *output = NULL;
+ unsigned char *fstat_output = NULL;
+ dict_t *xattrs = NULL;
+ uint64_t zero_array[2] = {0};
+ int source = 0;
+ int ret = 0;
+ uint64_t zero_value = 0;
+ int i = 0;
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ EC_REPLIES_ALLOC(fstat_replies, ec->nodes);
+ output = alloca0(ec->nodes);
+ fstat_output = alloca0(ec->nodes);
+ xattrs = dict_new();
+ if (!xattrs ||
+ dict_set_static_bin(xattrs, EC_XATTR_VERSION, zero_array,
+ sizeof(zero_array)) ||
+ dict_set_static_bin(xattrs, EC_XATTR_DIRTY, zero_array,
+ sizeof(zero_array)) ||
+ dict_set_static_bin(xattrs, EC_XATTR_SIZE, &zero_value,
+ sizeof(zero_value))) {
+ ret = -ENOMEM;
+ goto out;
+ }
- item = heal->lookup->cbk_list.next;
- while (item->next != &heal->lookup->cbk_list)
- {
- item = item->next;
- cbk = list_entry(item, ec_cbk_data_t, list);
+ ret = cluster_fxattrop(ec->xl_list, locked_on, ec->nodes, replies, output,
+ frame, ec->xl, fd, GF_XATTROP_ADD_ARRAY64, xattrs,
+ NULL);
- if ((cbk->op_ret < 0) || (cbk->iatt[0].ia_type != IA_IFREG) ||
- (gf_uuid_compare(heal->iatt.ia_gfid, cbk->iatt[0].ia_gfid) != 0))
- {
- ec_heal_exclude(heal, cbk->mask);
- }
- else
- {
- mask |= cbk->mask & ~heal->open;
- }
- }
+ ret = cluster_fstat(ec->xl_list, locked_on, ec->nodes, fstat_replies,
+ fstat_output, frame, ec->xl, fd, NULL);
- if (mask != 0)
- {
- ec_open(heal->fop->frame, heal->xl, mask, EC_MINIMUM_ONE,
- ec_heal_target_open_cbk, heal, &heal->loc, O_RDWR | O_TRUNC,
- heal->fd, NULL);
+ for (i = 0; i < ec->nodes; i++) {
+ output[i] = output[i] && fstat_output[i];
+ replies[i].valid = output[i];
+ if (output[i])
+ replies[i].stat = fstat_replies[i].stat;
+ }
- open |= mask;
+ if (EC_COUNT(output, ec->nodes) <= ec->fragments) {
+ ret = -ENOTCONN;
+ goto out;
}
- return (open != 0);
-}
+ source = ec_heal_data_find_direction(ec, replies, versions, dirty, size,
+ sources, healed_sinks, _gf_true,
+ EC_COMBINE_DICT);
+ ret = source;
+ if (ret < 0)
+ goto out;
-uintptr_t ec_heal_needs_data_rebuild(ec_heal_t *heal)
-{
- ec_fop_data_t *fop = heal->lookup;
- ec_cbk_data_t *cbk = NULL;
- uintptr_t bad = 0;
+ if (stbuf)
+ *stbuf = replies[source].stat;
- if ((heal->fop->error != 0) || (heal->good == 0) ||
- (heal->iatt.ia_type != IA_IFREG)) {
- return 0;
+ for (i = 0; i < ec->nodes; i++) {
+ if (healed_sinks[i]) {
+ if (replies[i].stat.ia_size)
+ trim[i] = 1;
+ }
}
- list_for_each_entry(cbk, &fop->cbk_list, list) {
- if ((cbk->op_ret >= 0) &&
- ((cbk->size != heal->raw_size) ||
- (cbk->version != heal->version))) {
- bad |= cbk->mask;
- }
+ if (EC_COUNT(sources, ec->nodes) < ec->fragments) {
+ ret = -ENOTCONN;
+ goto out;
}
- return bad;
+ ret = source;
+out:
+ if (xattrs)
+ dict_unref(xattrs);
+ cluster_replies_wipe(replies, ec->nodes);
+ cluster_replies_wipe(fstat_replies, ec->nodes);
+ if (ret < 0) {
+ gf_msg_debug(ec->xl->name, 0, "%s: heal failed %s",
+ uuid_utoa(fd->inode->gfid), strerror(-ret));
+ } else {
+ gf_msg_debug(ec->xl->name, 0,
+ "%s: sources: %d, sinks: "
+ "%d",
+ uuid_utoa(fd->inode->gfid), EC_COUNT(sources, ec->nodes),
+ EC_COUNT(healed_sinks, ec->nodes));
+ }
+ return ret;
}
-void ec_heal_setxattr_others(ec_heal_t * heal)
+int
+__ec_heal_mark_sinks(call_frame_t *frame, ec_t *ec, fd_t *fd,
+ uint64_t *versions, unsigned char *healed_sinks)
{
- ec_cbk_data_t * cbk;
- dict_t * xdata;
- int32_t error = ENOMEM;
+ int i = 0;
+ int ret = 0;
+ unsigned char *mark = NULL;
+ dict_t *xattrs = NULL;
+ default_args_cbk_t *replies = NULL;
+ unsigned char *output = NULL;
+ uint64_t versions_xattr[2] = {0};
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ xattrs = dict_new();
+ if (!xattrs) {
+ ret = -ENOMEM;
+ goto out;
+ }
- if ((heal->good != 0) && (heal->bad != 0))
- {
- cbk = heal->lookup->answer;
- xdata = cbk->xdata;
+ mark = alloca0(ec->nodes);
+ for (i = 0; i < ec->nodes; i++) {
+ if (!healed_sinks[i])
+ continue;
+ if ((versions[i] >> EC_SELFHEAL_BIT) & 1)
+ continue;
+ mark[i] = 1;
+ }
- if (dict_foreach_match (xdata, ec_ignorable_key_match, NULL,
- dict_remove_foreach_fn, NULL) == -1)
- goto out;
+ if (EC_COUNT(mark, ec->nodes) == 0)
+ return 0;
- if ((cbk->iatt[0].ia_type == IA_IFREG) ||
- (cbk->iatt[0].ia_type == IA_IFDIR))
- {
- if (ec_dict_set_number(xdata, EC_XATTR_VERSION, cbk->version) != 0)
- {
- goto out;
- }
- if (cbk->iatt[0].ia_type == IA_IFREG)
- {
- uint64_t dirty;
-
- dirty = ec_heal_needs_data_rebuild(heal) != 0;
- if ((ec_dict_set_number(xdata, EC_XATTR_SIZE,
- cbk->iatt[0].ia_size) != 0) ||
- (ec_dict_set_number(xdata, EC_XATTR_DIRTY, dirty) != 0)) {
- goto out;
- }
- }
- }
+ versions_xattr[EC_DATA_TXN] = hton64(1ULL << EC_SELFHEAL_BIT);
+ if (dict_set_static_bin(xattrs, EC_XATTR_VERSION, versions_xattr,
+ sizeof(versions_xattr))) {
+ ret = -ENOMEM;
+ goto out;
+ }
- ec_setxattr(heal->fop->frame, heal->xl, heal->bad, EC_MINIMUM_ONE,
- ec_heal_setxattr_cbk, heal, &heal->loc, xdata, 0, NULL);
+ output = alloca0(ec->nodes);
+ ret = cluster_fxattrop(ec->xl_list, mark, ec->nodes, replies, output, frame,
+ ec->xl, fd, GF_XATTROP_ADD_ARRAY64, xattrs, NULL);
+ for (i = 0; i < ec->nodes; i++) {
+ if (!output[i]) {
+ if (mark[i])
+ healed_sinks[i] = 0;
+ continue;
+ }
+ versions[i] |= (1ULL << EC_SELFHEAL_BIT);
}
- error = 0;
+ if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+ ret = 0;
out:
- ec_fop_set_error(heal->fop, error);
+ cluster_replies_wipe(replies, ec->nodes);
+ if (xattrs)
+ dict_unref(xattrs);
+ if (ret < 0)
+ gf_msg_debug(ec->xl->name, 0, "%s: heal failed %s",
+ uuid_utoa(fd->inode->gfid), strerror(-ret));
+ return ret;
}
int32_t
-ec_heal_xattr_clean (dict_t *dict, char *key, data_t *data,
- void *arg)
+ec_manager_heal_block(ec_fop_data_t *fop, int32_t state)
{
- dict_t *base = arg;
+ ec_heal_t *heal = fop->data;
+ heal->fop = fop;
- if (ec_ignorable_key_match (NULL, key, NULL, NULL)) {
- dict_del (dict, key);
- return 0;
- }
+ switch (state) {
+ case EC_STATE_INIT:
+ ec_owner_set(fop->frame, fop->frame->root);
- if (dict_get (base, key) != NULL)
- dict_del (dict, key);
+ ec_heal_inodelk(heal, F_WRLCK, 1, 0, 0);
- return 0;
-}
+ return EC_STATE_HEAL_DATA_COPY;
-void ec_heal_removexattr_others(ec_heal_t * heal)
-{
- struct list_head * item;
- ec_cbk_data_t * cbk;
- dict_t * xdata;
+ case EC_STATE_HEAL_DATA_COPY:
+ gf_msg_debug(fop->xl->name, 0, "%s: read/write starting",
+ uuid_utoa(heal->fd->inode->gfid));
+ ec_heal_data_block(heal);
- if ((heal->good == 0) || (heal->bad == 0))
- {
- return;
- }
+ return EC_STATE_HEAL_DATA_UNLOCK;
- xdata = heal->lookup->answer->xdata;
- item = heal->lookup->cbk_list.next;
- while (item->next != &heal->lookup->cbk_list)
- {
- item = item->next;
- cbk = list_entry(item, ec_cbk_data_t, list);
+ case -EC_STATE_HEAL_DATA_COPY:
+ case -EC_STATE_HEAL_DATA_UNLOCK:
+ case EC_STATE_HEAL_DATA_UNLOCK:
+ ec_heal_inodelk(heal, F_UNLCK, 1, 0, 0);
- if (cbk->op_ret >= 0)
- {
- if (dict_foreach(cbk->xdata, ec_heal_xattr_clean, xdata) == 0)
- {
- ec_removexattr(heal->fop->frame, heal->xl, cbk->mask,
- EC_MINIMUM_ONE, ec_heal_removexattr_cbk, heal,
- &heal->loc, "", cbk->xdata);
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ if (fop->cbks.heal) {
+ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, 0, 0,
+ (heal->good | heal->bad), heal->good, heal->bad,
+ 0, NULL);
}
- }
- }
-}
-void ec_heal_attr(ec_heal_t * heal)
-{
- if ((heal->good != 0) && (heal->bad != 0))
- {
- ec_setattr(heal->fop->frame, heal->xl, heal->bad, EC_MINIMUM_ONE,
- ec_heal_setattr_cbk, heal, &heal->loc, &heal->iatt,
- GF_SET_ATTR_MODE | GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME, NULL);
+ return EC_STATE_END;
+ case -EC_STATE_REPORT:
+ if (fop->cbks.heal) {
+ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, -1,
+ fop->error, 0, 0, 0, 0, NULL);
+ }
+
+ return EC_STATE_END;
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, 0, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
}
}
+/*Takes lock */
void
-ec_heal_open (ec_heal_t * heal)
+ec_heal_block(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_heal_cbk_t func, ec_heal_t *heal)
{
- heal->bad = ec_heal_needs_data_rebuild(heal);
- if (heal->bad == 0) {
- return;
- }
+ ec_cbk_t callback = {.heal = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- if (!heal->fd) {
- /* name-less loc heal */
- heal->fd = fd_create (heal->loc.inode,
- heal->fop->frame->root->pid);
- }
+ gf_msg_trace("ec", 0, "EC(HEAL) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, fop_flags,
+ NULL, ec_manager_heal_block, callback, heal);
+ if (fop == NULL)
+ goto out;
- if (!heal->fd) {
- ec_fop_set_error(heal->fop, ENOMEM);
- return;
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, heal, this, -1, error, 0, 0, 0, 0, NULL);
}
+}
- if (ec_heal_open_others(heal))
- {
- ec_open(heal->fop->frame, heal->xl, heal->good, EC_MINIMUM_MIN,
- ec_heal_source_open_cbk, heal, &heal->loc, O_RDONLY, heal->fd,
- NULL);
+int32_t
+ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uintptr_t mask,
+ uintptr_t good, uintptr_t bad, uint32_t pending,
+ dict_t *xdata)
+{
+ ec_heal_t *heal = cookie;
+
+ if (heal->fop) {
+ heal->fop->heal = NULL;
}
+ heal->fop = NULL;
+ heal->error = op_ret < 0 ? op_errno : 0;
+ syncbarrier_wake(heal->data);
+ return 0;
}
-void ec_heal_reopen_fd(ec_heal_t * heal)
+int
+ec_sync_heal_block(call_frame_t *frame, xlator_t *this, ec_heal_t *heal)
{
- inode_t * inode;
- fd_t * fd;
- ec_fd_t *ctx_fd;
- ec_inode_t *ctx_inode;
- uintptr_t mask;
- int32_t flags;
+ ec_heal_block(frame, this, heal->bad | heal->good, EC_MINIMUM_ONE,
+ ec_heal_block_done, heal);
+ syncbarrier_wait(heal->data, 1);
+ if (heal->error != 0) {
+ return -heal->error;
+ }
+ if (heal->bad == 0)
+ return -ENOTCONN;
+ return 0;
+}
- inode = heal->loc.inode;
+int
+ec_rebuild_data(call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
+ unsigned char *sources, unsigned char *healed_sinks)
+{
+ ec_heal_t *heal = NULL;
+ int ret = 0;
+ syncbarrier_t barrier;
+
+ if (syncbarrier_init(&barrier))
+ return -ENOMEM;
+
+ heal = alloca0(sizeof(*heal));
+ heal->fd = fd_ref(fd);
+ heal->xl = ec->xl;
+ heal->data = &barrier;
+ ec_adjust_size_up(ec, &size, _gf_false);
+ heal->total_size = size;
+ heal->size = (128 * GF_UNIT_KB * (ec->self_heal_window_size));
+ /* We need to adjust the size to a multiple of the stripe size of the
+ * volume. Otherwise writes would need to fill gaps (head and/or tail)
+ * with existent data from the bad bricks. This could be garbage on a
+ * damaged file or it could fail if there aren't enough bricks. */
+ heal->size -= heal->size % ec->stripe_size;
+ heal->bad = ec_char_array_to_mask(healed_sinks, ec->nodes);
+ heal->good = ec_char_array_to_mask(sources, ec->nodes);
+ heal->iatt.ia_type = IA_IFREG;
+ LOCK_INIT(&heal->lock);
- LOCK(&inode->lock);
+ for (heal->offset = 0; (heal->offset < size) && !heal->done;
+ heal->offset += heal->size) {
+ /* We immediately abort any heal if a shutdown request has been
+ * received to avoid delays. The healing of this file will be
+ * restarted by another SHD or other client that accesses the
+ * file. */
+ if (ec->shutdown) {
+ gf_msg_debug(ec->xl->name, 0,
+ "Cancelling heal because "
+ "EC is stopping.");
+ ret = -ENOTCONN;
+ break;
+ }
- ctx_inode = __ec_inode_get(inode, heal->xl);
- if (ctx_inode != NULL) {
- ctx_inode->bad &= ~(heal->good | heal->bad);
+ gf_msg_debug(ec->xl->name, 0,
+ "%s: sources: %d, sinks: "
+ "%d, offset: %" PRIu64 " bsize: %" PRIu64,
+ uuid_utoa(fd->inode->gfid), EC_COUNT(sources, ec->nodes),
+ EC_COUNT(healed_sinks, ec->nodes), heal->offset,
+ heal->size);
+ ret = ec_sync_heal_block(frame, ec->xl, heal);
+ if (ret < 0)
+ break;
}
+ memset(healed_sinks, 0, ec->nodes);
+ ec_mask_to_char_array(heal->bad, healed_sinks, ec->nodes);
+ fd_unref(heal->fd);
+ LOCK_DESTROY(&heal->lock);
+ syncbarrier_destroy(heal->data);
+ if (ret < 0)
+ gf_msg_debug(ec->xl->name, 0, "%s: heal failed %s",
+ uuid_utoa(fd->inode->gfid), strerror(-ret));
+ return ret;
+}
- list_for_each_entry(fd, &inode->fd_list, inode_list)
- {
- ctx_fd = ec_fd_get(fd, heal->xl);
- if (ctx_fd != NULL) {
- mask = heal->bad & ~ctx_fd->open;
- if (mask != 0)
- {
- UNLOCK(&inode->lock);
-
- if (heal->iatt.ia_type == IA_IFDIR)
- {
- ec_opendir(heal->fop->frame, heal->xl, mask,
- EC_MINIMUM_ONE, ec_heal_reopen_cbk, NULL,
- &heal->loc, fd, NULL);
- }
- else
- {
- flags = ctx_fd->flags & ~(O_TRUNC | O_APPEND);
+int
+__ec_heal_trim_sinks(call_frame_t *frame, ec_t *ec, fd_t *fd,
+ unsigned char *healed_sinks, unsigned char *trim,
+ uint64_t size)
+{
+ default_args_cbk_t *replies = NULL;
+ unsigned char *output = NULL;
+ int ret = 0;
+ int i = 0;
+ off_t trim_offset = 0;
- ec_open(heal->fop->frame, heal->xl, mask, EC_MINIMUM_ONE,
- ec_heal_reopen_cbk, NULL, &heal->loc, flags, fd,
- NULL);
- }
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ output = alloca0(ec->nodes);
- LOCK(&inode->lock);
- }
- }
+ if (EC_COUNT(trim, ec->nodes) == 0) {
+ ret = 0;
+ goto out;
+ }
+ trim_offset = size;
+ ec_adjust_offset_up(ec, &trim_offset, _gf_true);
+ ret = cluster_ftruncate(ec->xl_list, trim, ec->nodes, replies, output,
+ frame, ec->xl, fd, trim_offset, NULL);
+ for (i = 0; i < ec->nodes; i++) {
+ if (!output[i] && trim[i])
+ healed_sinks[i] = 0;
}
- UNLOCK(&inode->lock);
+ if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+out:
+ cluster_replies_wipe(replies, ec->nodes);
+ if (ret < 0)
+ gf_msg_debug(ec->xl->name, 0, "%s: heal failed %s",
+ uuid_utoa(fd->inode->gfid), strerror(-ret));
+ return ret;
}
-int32_t ec_heal_writev_cbk(call_frame_t * frame, void * cookie,
- xlator_t * this, int32_t op_ret, int32_t op_errno,
- struct iatt * prebuf, struct iatt * postbuf,
- dict_t * xdata)
+int
+ec_data_undo_pending(call_frame_t *frame, ec_t *ec, fd_t *fd, dict_t *xattr,
+ uint64_t *versions, uint64_t *dirty, uint64_t *size,
+ int source, gf_boolean_t erase_dirty, int idx)
{
- ec_trace("WRITE_CBK", cookie, "ret=%d, errno=%d", op_ret, op_errno);
+ uint64_t versions_xattr[2] = {0};
+ uint64_t dirty_xattr[2] = {0};
+ uint64_t allzero[2] = {0};
+ uint64_t size_xattr = 0;
+ int ret = 0;
+
+ versions_xattr[EC_DATA_TXN] = hton64(versions[source] - versions[idx]);
+ ret = dict_set_static_bin(xattr, EC_XATTR_VERSION, versions_xattr,
+ sizeof(versions_xattr));
+ if (ret < 0)
+ goto out;
- ec_heal_update(cookie, 0);
+ size_xattr = hton64(size[source] - size[idx]);
+ ret = dict_set_static_bin(xattr, EC_XATTR_SIZE, &size_xattr,
+ sizeof(size_xattr));
+ if (ret < 0)
+ goto out;
- return 0;
+ if (erase_dirty) {
+ dirty_xattr[EC_DATA_TXN] = hton64(-dirty[idx]);
+ ret = dict_set_static_bin(xattr, EC_XATTR_DIRTY, dirty_xattr,
+ sizeof(dirty_xattr));
+ if (ret < 0)
+ goto out;
+ }
+
+ if ((memcmp(versions_xattr, allzero, sizeof(allzero)) == 0) &&
+ (memcmp(dirty_xattr, allzero, sizeof(allzero)) == 0) &&
+ (size_xattr == 0)) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = syncop_fxattrop(ec->xl_list[idx], fd, GF_XATTROP_ADD_ARRAY64, xattr,
+ NULL, NULL, NULL);
+out:
+ return ret;
}
-int32_t ec_heal_readv_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno,
- struct iovec * vector, int32_t count,
- struct iatt * stbuf, struct iobref * iobref,
- dict_t * xdata)
+int
+__ec_fd_data_adjust_versions(call_frame_t *frame, ec_t *ec, fd_t *fd,
+ unsigned char *sources,
+ unsigned char *healed_sinks, uint64_t *versions,
+ uint64_t *dirty, uint64_t *size)
{
- ec_fop_data_t * fop = cookie;
- ec_heal_t * heal = fop->data;
+ dict_t *xattr = NULL;
+ int i = 0;
+ int ret = 0;
+ int op_ret = 0;
+ int source = -1;
+ gf_boolean_t erase_dirty = _gf_false;
+
+ xattr = dict_new();
+ if (!xattr) {
+ op_ret = -ENOMEM;
+ goto out;
+ }
- ec_trace("READ_CBK", fop, "ret=%d, errno=%d", op_ret, op_errno);
+ /* dirty xattr represents if the file needs heal. Unless all the
+ * copies are healed, don't erase it */
+ if (EC_COUNT(sources, ec->nodes) + EC_COUNT(healed_sinks, ec->nodes) ==
+ ec->nodes)
+ erase_dirty = _gf_true;
- ec_heal_avoid(fop);
+ for (i = 0; i < ec->nodes; i++) {
+ if (sources[i]) {
+ source = i;
+ break;
+ }
+ }
- if (op_ret > 0)
- {
- ec_writev(heal->fop->frame, heal->xl, heal->bad, EC_MINIMUM_ONE,
- ec_heal_writev_cbk, heal, heal->fd, vector, count,
- heal->offset, 0, iobref, NULL);
+ if (source == -1) {
+ op_ret = -ENOTCONN;
+ goto out;
}
- else
- {
- heal->done = 1;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (healed_sinks[i]) {
+ ret = ec_data_undo_pending(frame, ec, fd, xattr, versions, dirty,
+ size, source, erase_dirty, i);
+ if (ret < 0)
+ goto out;
+ }
}
- return 0;
+ if (!erase_dirty)
+ goto out;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (sources[i]) {
+ ret = ec_data_undo_pending(frame, ec, fd, xattr, versions, dirty,
+ size, source, erase_dirty, i);
+ if (ret < 0)
+ continue;
+ }
+ }
+out:
+ if (xattr)
+ dict_unref(xattr);
+ return op_ret;
}
-void ec_heal_data(ec_heal_t * heal)
+int
+ec_restore_time_and_adjust_versions(call_frame_t *frame, ec_t *ec, fd_t *fd,
+ unsigned char *sources,
+ unsigned char *healed_sinks,
+ uint64_t *versions, uint64_t *dirty,
+ uint64_t *size)
{
- ec_trace("DATA", heal->fop, "good=%lX, bad=%lX", heal->good, heal->bad);
-
- if ((heal->good != 0) && (heal->bad != 0) &&
- (heal->iatt.ia_type == IA_IFREG))
+ unsigned char *locked_on = NULL;
+ unsigned char *participants = NULL;
+ unsigned char *output = NULL;
+ default_args_cbk_t *replies = NULL;
+ unsigned char *postsh_sources = NULL;
+ unsigned char *postsh_healed_sinks = NULL;
+ unsigned char *postsh_trim = NULL;
+ uint64_t *postsh_versions = NULL;
+ uint64_t *postsh_dirty = NULL;
+ uint64_t *postsh_size = NULL;
+ int ret = 0;
+ int i = 0;
+ struct iatt source_buf = {0};
+ loc_t loc = {0};
+
+ locked_on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ participants = alloca0(ec->nodes);
+ postsh_sources = alloca0(ec->nodes);
+ postsh_healed_sinks = alloca0(ec->nodes);
+ postsh_trim = alloca0(ec->nodes);
+ postsh_versions = alloca0(ec->nodes * sizeof(*postsh_versions));
+ postsh_dirty = alloca0(ec->nodes * sizeof(*postsh_dirty));
+ postsh_size = alloca0(ec->nodes * sizeof(*postsh_size));
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (healed_sinks[i] || sources[i])
+ participants[i] = 1;
+ }
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ ret = cluster_inodelk(ec->xl_list, participants, ec->nodes, replies,
+ locked_on, frame, ec->xl, ec->xl->name, fd->inode, 0,
+ 0);
{
- ec_readv(heal->fop->frame, heal->xl, heal->good, EC_MINIMUM_MIN,
- ec_heal_readv_cbk, heal, heal->fd, heal->size, heal->offset,
- 0, NULL);
+ if (ret <= ec->fragments) {
+ gf_msg_debug(ec->xl->name, 0,
+ "%s: Skipping heal "
+ "as only %d number of subvolumes could "
+ "be locked",
+ uuid_utoa(fd->inode->gfid), ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __ec_heal_data_prepare(frame, ec, fd, locked_on, postsh_versions,
+ postsh_dirty, postsh_size, postsh_sources,
+ postsh_healed_sinks, postsh_trim,
+ &source_buf);
+ if (ret < 0)
+ goto unlock;
+
+ loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(loc.gfid, fd->inode->gfid);
+ ret = cluster_setattr(
+ ec->xl_list, healed_sinks, ec->nodes, replies, output, frame,
+ ec->xl, &loc, &source_buf,
+ GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME | GF_SET_ATTR_CTIME, NULL);
+ EC_INTERSECT(healed_sinks, healed_sinks, output, ec->nodes);
+ if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+ ret = __ec_fd_data_adjust_versions(frame, ec, fd, sources, healed_sinks,
+ versions, dirty, size);
}
+unlock:
+ cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
+ ec->xl, ec->xl->name, fd->inode, 0, 0);
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ return ret;
}
-void ec_heal_update_dirty(ec_heal_t *heal, uintptr_t mask)
+int
+__ec_heal_data(call_frame_t *frame, ec_t *ec, fd_t *fd, unsigned char *heal_on,
+ unsigned char *sources, unsigned char *healed_sinks)
{
- dict_t *dict;
+ unsigned char *locked_on = NULL;
+ unsigned char *output = NULL;
+ uint64_t *versions = NULL;
+ uint64_t *dirty = NULL;
+ uint64_t *size = NULL;
+ unsigned char *trim = NULL;
+ default_args_cbk_t *replies = NULL;
+ int ret = 0;
+ int source = 0;
+
+ locked_on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ trim = alloca0(ec->nodes);
+ versions = alloca0(ec->nodes * sizeof(*versions));
+ dirty = alloca0(ec->nodes * sizeof(*dirty));
+ size = alloca0(ec->nodes * sizeof(*size));
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ ret = cluster_inodelk(ec->xl_list, heal_on, ec->nodes, replies, locked_on,
+ frame, ec->xl, ec->xl->name, fd->inode, 0, 0);
+ {
+ if (ret <= ec->fragments) {
+ gf_msg_debug(ec->xl->name, 0,
+ "%s: Skipping heal "
+ "as only %d number of subvolumes could "
+ "be locked",
+ uuid_utoa(fd->inode->gfid), ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
- dict = dict_new();
- if (dict == NULL) {
- ec_fop_set_error(heal->fop, EIO);
+ ret = __ec_heal_data_prepare(frame, ec, fd, locked_on, versions, dirty,
+ size, sources, healed_sinks, trim, NULL);
+ if (ret < 0)
+ goto unlock;
- return;
- }
+ if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
+ ret = __ec_fd_data_adjust_versions(
+ frame, ec, fd, sources, healed_sinks, versions, dirty, size);
+ goto unlock;
+ }
- if (ec_dict_set_number(dict, EC_XATTR_DIRTY, -1) != 0) {
- dict_unref(dict);
- ec_fop_set_error(heal->fop, EIO);
+ source = ret;
+ ret = __ec_heal_mark_sinks(frame, ec, fd, versions, healed_sinks);
+ if (ret < 0)
+ goto unlock;
- return;
+ ret = __ec_heal_trim_sinks(frame, ec, fd, healed_sinks, trim,
+ size[source]);
}
+unlock:
+ cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
+ ec->xl, ec->xl->name, fd->inode, 0, 0);
+ if (ret < 0)
+ goto out;
- ec_fxattrop(heal->fop->frame, heal->xl, mask, EC_MINIMUM_ONE, NULL, NULL,
- heal->fd, GF_XATTROP_ADD_ARRAY64, dict, NULL);
+ if (EC_COUNT(healed_sinks, ec->nodes) == 0)
+ goto out;
+
+ gf_msg_debug(ec->xl->name, 0,
+ "%s: sources: %d, sinks: "
+ "%d",
+ uuid_utoa(fd->inode->gfid), EC_COUNT(sources, ec->nodes),
+ EC_COUNT(healed_sinks, ec->nodes));
+
+ ret = ec_rebuild_data(frame, ec, fd, size[source], sources, healed_sinks);
+ if (ret < 0)
+ goto out;
- dict_unref(dict);
+ ret = ec_restore_time_and_adjust_versions(
+ frame, ec, fd, sources, healed_sinks, versions, dirty, size);
+out:
+ cluster_replies_wipe(replies, ec->nodes);
+ return ret;
}
-void ec_heal_dispatch(ec_heal_t *heal)
+int
+ec_heal_data(call_frame_t *frame, ec_t *ec, gf_boolean_t block, inode_t *inode,
+ unsigned char *sources, unsigned char *healed_sinks)
{
- ec_fop_data_t *fop;
- ec_cbk_data_t *cbk;
- inode_t *inode;
- ec_inode_t *ctx;
- ec_heal_t *next = NULL;
- struct list_head list;
- int32_t error;
-
- inode = heal->loc.inode;
-
- INIT_LIST_HEAD(&list);
-
- LOCK(&inode->lock);
-
- /* A heal object not belonging to any list means that it has not been fully
- * executed. It got its information from a previous heal that was executing
- * when this heal started. */
- if (!list_empty(&heal->list)) {
- list_del_init(&heal->list);
- ctx = __ec_inode_get(inode, heal->xl);
- if (ctx != NULL) {
- ctx->bad &= ~heal->good;
-
- if (heal->partial) {
- /* Collect all partial heal requests. All of them will receive
- * the same answer. 'next' will contain a pointer to the first
- * full request (if any) after this partial heal request.*/
- while (!list_empty(&ctx->heal)) {
- next = list_entry(ctx->heal.next, ec_heal_t, list);
- if (!next->partial) {
- break;
- }
- list_move_tail(&next->list, &list);
- }
- if (list_empty(&ctx->heal)) {
- next = NULL;
- }
- } else {
- /* This is a full heal request, so take all received heal
- * requests to answer them now. */
- list_splice_init(&ctx->heal, &list);
- }
- }
+ unsigned char *locked_on = NULL;
+ unsigned char *up_subvols = NULL;
+ unsigned char *output = NULL;
+ default_args_cbk_t *replies = NULL;
+ fd_t *fd = NULL;
+ loc_t loc = {0};
+ char selfheal_domain[1024] = {0};
+ int ret = 0;
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+
+ locked_on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ up_subvols = alloca0(ec->nodes);
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ fd = fd_create(inode, 0);
+ if (!fd) {
+ ret = -ENOMEM;
+ goto out;
}
- UNLOCK(&inode->lock);
-
- fop = heal->fop;
- error = fop->error;
-
- cbk = ec_cbk_data_allocate(fop->frame, heal->xl, fop, fop->id, 0,
- error == 0 ? 0 : -1, error);
- if (cbk != NULL) {
- cbk->uintptr[0] = heal->available;
- cbk->uintptr[1] = heal->good;
- cbk->uintptr[2] = heal->fixed;
+ ec_mask_to_char_array(ec->xl_up, up_subvols, ec->nodes);
- ec_combine(cbk, NULL);
-
- fop->answer = cbk;
- } else if (error == 0) {
- error = ENOMEM;
+ ret = cluster_open(ec->xl_list, up_subvols, ec->nodes, replies, output,
+ frame, ec->xl, &loc, O_RDWR | O_LARGEFILE, fd, NULL);
+ if (ret <= ec->fragments) {
+ ret = -ENOTCONN;
+ goto out;
}
- if (heal->lookup != NULL)
- {
- ec_fop_data_release(heal->lookup);
+ fd_bind(fd);
+ sprintf(selfheal_domain, "%s:self-heal", ec->xl->name);
+ /*If other processes are already doing the heal, don't block*/
+ if (block) {
+ ret = cluster_inodelk(ec->xl_list, output, ec->nodes, replies,
+ locked_on, frame, ec->xl, selfheal_domain, inode,
+ 0, 0);
+ } else {
+ ret = cluster_tiebreaker_inodelk(ec->xl_list, output, ec->nodes,
+ replies, locked_on, frame, ec->xl,
+ selfheal_domain, inode, 0, 0);
}
- if (heal->fd != NULL)
{
- fd_unref(heal->fd);
+ if (ret <= ec->fragments) {
+ gf_msg_debug(ec->xl->name, 0,
+ "%s: Skipping heal "
+ "as only %d number of subvolumes could "
+ "be locked",
+ uuid_utoa(inode->gfid), ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+ ret = __ec_heal_data(frame, ec, fd, locked_on, sources, healed_sinks);
}
- GF_FREE(heal->symlink);
- loc_wipe(&heal->loc);
-
- LOCK_DESTROY(&heal->lock);
+unlock:
+ cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
+ ec->xl, selfheal_domain, inode, 0, 0);
+out:
+ if (fd)
+ fd_unref(fd);
+ loc_wipe(&loc);
+ cluster_replies_wipe(replies, ec->nodes);
+ return ret;
+}
- GF_FREE(heal);
+int
+ec_heal_purge_stale_index(call_frame_t *frame, ec_t *ec, inode_t *inode)
+{
+ int i = 0;
+ int ret = 0;
+ dict_t **xattr = NULL;
+ loc_t loc = {0};
+ uint64_t dirty_xattr[EC_VERSION_SIZE] = {0};
+ unsigned char *on = NULL;
+ default_args_cbk_t *replies = NULL;
+ dict_t *dict = NULL;
+
+ /* Allocate the required memory */
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ on = alloca0(ec->nodes);
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer);
+ if (!xattr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ dict = dict_new();
+ if (!dict) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < ec->nodes; i++) {
+ xattr[i] = dict;
+ on[i] = 1;
+ }
+ ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr,
+ (sizeof(*dirty_xattr) * EC_VERSION_SIZE));
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame,
+ ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64,
+ xattr, NULL);
+out:
+ if (dict) {
+ dict_unref(dict);
+ }
+ if (xattr) {
+ GF_FREE(xattr);
+ }
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ return ret;
+}
- ec_fop_set_error(fop, error);
+void
+ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
+{
+ call_frame_t *frame = NULL;
+ unsigned char *participants = NULL;
+ unsigned char *msources = NULL;
+ unsigned char *mhealed_sinks = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *healed_sinks = NULL;
+ ec_t *ec = NULL;
+ int ret = 0;
+ int op_ret = 0;
+ int op_errno = 0;
+ intptr_t mgood = 0;
+ intptr_t mbad = 0;
+ intptr_t good = 0;
+ intptr_t bad = 0;
+ uint32_t pending = 0;
+ ec_fop_data_t *fop = data;
+ gf_boolean_t blocking = _gf_false;
+ ec_heal_need_t need_heal = EC_HEAL_NONEED;
+ unsigned char *up_subvols = NULL;
+ char up_bricks[32];
+
+ ec = this->private;
+
+ /* If it is heal request from getxattr, complete the heal and then
+ * unwind, if it is ec_heal with NULL as frame then no need to block
+ * the heal as the caller doesn't care about its completion. In case
+ * of heald whichever gets tiebreaking inodelk will take care of the
+ * heal, so no need to block*/
+ if (fop->req_frame && !ec->shd.iamshd)
+ blocking = _gf_true;
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ goto out;
- /* Resume all pending heal requests, setting the same data obtained by
- * this heal execution. */
- while (!list_empty(&list)) {
- heal = list_entry(list.next, ec_heal_t, list);
- list_del_init(&heal->list);
+ ec_owner_set(frame, frame->root);
+ /*Do heal as root*/
+ frame->root->uid = 0;
+ frame->root->gid = 0;
+ /*Mark the fops as internal*/
+ frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
+ participants = alloca0(ec->nodes);
+ ec_mask_to_char_array(ec->xl_up, participants, ec->nodes);
+
+ up_subvols = alloca0(ec->nodes);
+ ec_mask_to_char_array(ec->xl_up, up_subvols, ec->nodes);
+
+ if (loc->name && strlen(loc->name)) {
+ ret = ec_heal_name(frame, ec, loc->parent, (char *)loc->name,
+ participants);
+ if (ret >= 0) {
+ gf_msg_debug(this->name, 0,
+ "%s: name heal "
+ "successful on %" PRIXPTR,
+ loc->path,
+ ec_char_array_to_mask(participants, ec->nodes));
+ } else {
+ gf_msg_debug(
+ this->name, 0,
+ "%s: name heal "
+ "failed. ret = %d, subvolumes up = %s",
+ loc->path, ret,
+ ec_bin(up_bricks, sizeof(up_bricks), ec->xl_up, ec->nodes));
+ }
+ }
- heal->available = cbk->uintptr[0];
- heal->good = cbk->uintptr[1];
- heal->fixed = cbk->uintptr[2];
+ /* Mount triggers heal only when it detects that it must need heal, shd
+ * triggers heals periodically which need not be thorough*/
+ if (ec->shd.iamshd && (ret <= 0)) {
+ ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
+ &need_heal);
+
+ if (need_heal == EC_HEAL_PURGE_INDEX) {
+ gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
+ "Index entry needs to be purged for: %s ",
+ uuid_utoa(loc->gfid));
+ /* We need to send zero-xattrop so that stale index entry could be
+ * removed. We need not take lock on this entry to do so as
+ * xattrop on a brick is atomic. */
+ ec_heal_purge_stale_index(frame, ec, loc->inode);
+ goto out;
+ } else if (need_heal == EC_HEAL_NONEED) {
+ gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
+ "Heal is not required for : %s ", uuid_utoa(loc->gfid));
+ goto out;
+ }
+ }
- /* Setting 'done' to 1 avoids executing all heal logic and directly
- * reports the result to the caller. */
- heal->done = 1;
+ sources = alloca0(ec->nodes);
+ healed_sinks = alloca0(ec->nodes);
+ if (IA_ISREG(loc->inode->ia_type)) {
+ ret = ec_heal_data(frame, ec, blocking, loc->inode, sources,
+ healed_sinks);
+ } else if (IA_ISDIR(loc->inode->ia_type) && !partial) {
+ ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks,
+ &pending);
+ } else {
+ ret = 0;
+ memcpy(sources, participants, ec->nodes);
+ memcpy(healed_sinks, participants, ec->nodes);
+ }
- ec_resume(heal->fop, error);
+ if (ret == 0) {
+ good = ec_char_array_to_mask(sources, ec->nodes);
+ bad = ec_char_array_to_mask(healed_sinks, ec->nodes);
+ } else {
+ op_ret = -1;
+ op_errno = -ret;
+ }
+ msources = alloca0(ec->nodes);
+ mhealed_sinks = alloca0(ec->nodes);
+ ret = ec_heal_metadata(frame, ec, loc->inode, msources, mhealed_sinks);
+ if (ret == 0) {
+ mgood = ec_char_array_to_mask(msources, ec->nodes);
+ mbad = ec_char_array_to_mask(mhealed_sinks, ec->nodes);
+ } else {
+ op_ret = -1;
+ op_errno = -ret;
}
- /* If there is a pending full request, resume it. */
- if (next != NULL) {
- ec_resume(next->fop, 0);
+out:
+ ec_reset_entry_healing(fop);
+ if (fop->cbks.heal) {
+ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, op_ret, op_errno,
+ ec_char_array_to_mask(participants, ec->nodes),
+ mgood & good, mbad & bad, pending, NULL);
}
+ if (frame)
+ STACK_DESTROY(frame->root);
+
+ return;
}
-void ec_wind_heal(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+int
+ec_synctask_heal_wrap(void *opaque)
{
- ec_cbk_data_t * cbk;
- ec_heal_t *heal = fop->heal;
+ ec_fop_data_t *fop = opaque;
+ ec_heal_do(fop->xl, fop, &fop->loc[0], fop->int32);
+ return 0;
+}
- ec_trace("WIND", fop, "idx=%d", idx);
+int
+ec_heal_done(int ret, call_frame_t *heal, void *opaque)
+{
+ if (opaque)
+ ec_fop_data_release(opaque);
+ return 0;
+}
- cbk = ec_cbk_data_allocate(fop->frame, fop->xl, fop, EC_FOP_HEAL, idx,
- fop->error == 0 ? 0 : -1, fop->error);
- if (cbk != NULL)
- {
- cbk->uintptr[0] = heal->available;
- cbk->uintptr[1] = heal->good;
- cbk->uintptr[2] = heal->bad;
+ec_fop_data_t *
+__ec_dequeue_heals(ec_t *ec)
+{
+ ec_fop_data_t *fop = NULL;
+
+ if (list_empty(&ec->heal_waiting))
+ goto none;
+
+ if ((ec->background_heals > 0) && (ec->healers >= ec->background_heals))
+ goto none;
+
+ fop = list_entry(ec->heal_waiting.next, ec_fop_data_t, healer);
+ ec->heal_waiters--;
+ list_del_init(&fop->healer);
+ list_add(&fop->healer, &ec->healing);
+ ec->healers++;
+ return fop;
+none:
+ gf_msg_debug(ec->xl->name, 0, "Num healers: %d, Num Waiters: %d",
+ ec->healers, ec->heal_waiters);
+ return NULL;
+}
- ec_combine(cbk, NULL);
+void
+ec_heal_fail(ec_t *ec, ec_fop_data_t *fop)
+{
+ if (fop->cbks.heal) {
+ fop->cbks.heal(fop->req_frame, fop->data, ec->xl, -1, fop->error, 0, 0,
+ 0, 0, NULL);
}
-
- ec_complete(fop);
+ ec_fop_data_release(fop);
}
-int32_t
-ec_manager_heal (ec_fop_data_t * fop, int32_t state)
+void
+ec_launch_heal(ec_t *ec, ec_fop_data_t *fop)
{
- ec_cbk_data_t * cbk;
- ec_heal_t *heal = fop->heal;
+ int ret = 0;
+ call_frame_t *frame = NULL;
- switch (state)
- {
- case EC_STATE_INIT:
- ec_owner_set(fop->frame, fop->frame->root);
+ frame = create_frame(ec->xl, ec->xl->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
- fop->error = ec_heal_init(fop);
- if (fop->error != 0)
- {
- return EC_STATE_REPORT;
- }
+ ec_owner_set(frame, frame->root);
+ /*Do heal as root*/
+ frame->root->uid = 0;
+ frame->root->gid = 0;
+ /*Mark the fops as internal*/
+ frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
- heal = fop->heal;
- /* root loc doesn't have pargfid/parent */
- if (loc_is_root (&heal->loc) ||
- !gf_uuid_is_null(heal->loc.pargfid) || heal->loc.parent) {
- heal->nameheal = _gf_true;
- return EC_STATE_DISPATCH;
- } else {
- /* No need to perform 'name' heal.*/
- return EC_STATE_HEAL_PRE_INODELK_LOCK;
- }
+ ret = synctask_new(ec->xl->ctx->env, ec_synctask_heal_wrap, ec_heal_done,
+ frame, fop);
+out:
+ if (ret < 0) {
+ ec_fop_set_error(fop, ENOMEM);
+ ec_heal_fail(ec, fop);
+ }
- case EC_STATE_DISPATCH:
- if (heal->done) {
- return EC_STATE_HEAL_DISPATCH;
- }
+ if (frame)
+ STACK_DESTROY(frame->root);
+}
- ec_heal_entrylk(heal, ENTRYLK_LOCK);
+void
+ec_handle_healers_done(ec_fop_data_t *fop)
+{
+ ec_t *ec = fop->xl->private;
+ ec_fop_data_t *heal_fop = NULL;
- return EC_STATE_HEAL_ENTRY_LOOKUP;
+ if (list_empty(&fop->healer))
+ return;
- case EC_STATE_HEAL_ENTRY_LOOKUP:
- ec_lookup(fop->frame, heal->xl, fop->mask, EC_MINIMUM_MIN,
- ec_heal_entry_lookup_cbk, heal, &heal->loc, NULL);
+ LOCK(&ec->lock);
- return EC_STATE_HEAL_ENTRY_PREPARE;
+ list_del_init(&fop->healer);
- case EC_STATE_HEAL_ENTRY_PREPARE:
- if (!heal->partial || (heal->iatt.ia_type == IA_IFDIR)) {
- ec_heal_prepare(heal);
- }
+ do {
+ ec->healers--;
+ heal_fop = __ec_dequeue_heals(ec);
- if (heal->partial) {
- return EC_STATE_HEAL_UNLOCK_ENTRY;
- }
+ if ((heal_fop != NULL) && ec->shutdown) {
+ /* This will prevent ec_handle_healers_done() to be
+ * called recursively. That would be problematic if
+ * the queue is too big. */
+ list_del_init(&heal_fop->healer);
- return EC_STATE_HEAL_PRE_INODELK_LOCK;
+ UNLOCK(&ec->lock);
- case EC_STATE_HEAL_PRE_INODELK_LOCK:
- if (heal->done)
- return EC_STATE_HEAL_DISPATCH;
+ ec_fop_set_error(fop, ENOTCONN);
+ ec_heal_fail(ec, heal_fop);
- /* Only heal data/metadata if enough information is supplied. */
- if (gf_uuid_is_null(heal->loc.gfid))
- {
- ec_heal_entrylk(heal, ENTRYLK_UNLOCK);
+ LOCK(&ec->lock);
+ }
+ } while ((heal_fop != NULL) && ec->shutdown);
- return EC_STATE_HEAL_DISPATCH;
- }
+ UNLOCK(&ec->lock);
- ec_heal_inodelk(heal, F_WRLCK, 0, 0, 0);
+ if (heal_fop)
+ ec_launch_heal(ec, heal_fop);
+}
- return EC_STATE_HEAL_PRE_INODE_LOOKUP;
+gf_boolean_t
+ec_is_entry_healing(ec_fop_data_t *fop)
+{
+ ec_inode_t *ctx = NULL;
+ int32_t heal_count = 0;
+ loc_t *loc = NULL;
- case EC_STATE_HEAL_PRE_INODE_LOOKUP:
- ec_heal_lookup(heal, heal->fop->mask);
+ loc = &fop->loc[0];
- return EC_STATE_HEAL_XATTRIBUTES_REMOVE;
+ LOCK(&loc->inode->lock);
+ {
+ ctx = __ec_inode_get(loc->inode, fop->xl);
+ if (ctx) {
+ heal_count = ctx->heal_count;
+ }
+ }
+ UNLOCK(&loc->inode->lock);
+ GF_ASSERT(heal_count >= 0);
+ return heal_count;
+}
- case EC_STATE_HEAL_XATTRIBUTES_REMOVE:
- ec_heal_removexattr_others(heal);
+void
+ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
+{
+ gf_boolean_t can_heal = _gf_true;
+ ec_t *ec = this->private;
+ ec_fop_data_t *fop_rel = NULL;
- return EC_STATE_HEAL_XATTRIBUTES_SET;
+ if (fop->req_frame == NULL) {
+ LOCK(&ec->lock);
+ {
+ if ((ec->background_heals > 0) &&
+ (ec->heal_wait_qlen + ec->background_heals) >
+ (ec->heal_waiters + ec->healers)) {
+ if (!ec_is_entry_healing(fop)) {
+ list_add_tail(&fop->healer, &ec->heal_waiting);
+ ec->heal_waiters++;
+ ec_set_entry_healing(fop);
+ } else {
+ fop_rel = fop;
+ }
+ fop = __ec_dequeue_heals(ec);
+ } else {
+ can_heal = _gf_false;
+ }
+ }
+ UNLOCK(&ec->lock);
+ }
- case EC_STATE_HEAL_XATTRIBUTES_SET:
- ec_heal_setxattr_others(heal);
+ if (can_heal) {
+ if (fop) {
+ if (fop->req_frame != NULL) {
+ ec_set_entry_healing(fop);
+ }
+ ec_launch_heal(ec, fop);
+ }
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Max number of heals are "
+ "pending, background self-heal rejected");
+ ec_fop_set_error(fop, EBUSY);
+ ec_heal_fail(ec, fop);
+ }
+ if (fop_rel) {
+ ec_heal_done(0, NULL, fop_rel);
+ }
+}
- return EC_STATE_HEAL_ATTRIBUTES;
+void
+ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_heal_cbk_t func, void *data, loc_t *loc,
+ int32_t partial, dict_t *xdata)
+{
+ ec_cbk_t callback = {.heal = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t err = EINVAL;
- case EC_STATE_HEAL_ATTRIBUTES:
- ec_heal_attr(heal);
+ gf_msg_trace("ec", 0, "EC(HEAL) %p", frame);
- return EC_STATE_HEAL_OPEN;
+ VALIDATE_OR_GOTO(this, fail);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, fail);
- case EC_STATE_HEAL_OPEN:
- ec_heal_open(heal);
+ if (!loc || !loc->inode || gf_uuid_is_null(loc->inode->gfid))
+ goto fail;
- return EC_STATE_HEAL_REOPEN_FD;
+ if (frame && frame->local)
+ goto fail;
+ fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, fop_flags,
+ NULL, NULL, callback, data);
- case EC_STATE_HEAL_REOPEN_FD:
- ec_heal_reopen_fd(heal);
+ err = ENOMEM;
- return EC_STATE_HEAL_UNLOCK;
+ if (fop == NULL)
+ goto fail;
- case -EC_STATE_HEAL_XATTRIBUTES_REMOVE:
- case -EC_STATE_HEAL_XATTRIBUTES_SET:
- case -EC_STATE_HEAL_ATTRIBUTES:
- case -EC_STATE_HEAL_OPEN:
- case -EC_STATE_HEAL_REOPEN_FD:
- case -EC_STATE_HEAL_UNLOCK:
- case EC_STATE_HEAL_UNLOCK:
- ec_heal_inodelk(heal, F_UNLCK, 0, 0, 0);
+ fop->int32 = partial;
- /* Fall through */
+ if (loc) {
+ if (loc_copy(&fop->loc[0], loc) != 0)
+ goto fail;
+ }
- case -EC_STATE_HEAL_ENTRY_PREPARE:
- case -EC_STATE_HEAL_PRE_INODELK_LOCK:
- case -EC_STATE_HEAL_PRE_INODE_LOOKUP:
- case -EC_STATE_HEAL_UNLOCK_ENTRY:
- case EC_STATE_HEAL_UNLOCK_ENTRY:
- if (heal->nameheal)
- ec_heal_entrylk(heal, ENTRYLK_UNLOCK);
+ if (xdata)
+ fop->xdata = dict_ref(xdata);
- heal->bad = ec_heal_needs_data_rebuild(heal);
- if (heal->bad != 0)
- {
- return EC_STATE_HEAL_DATA_LOCK;
- }
+ ec_heal_throttle(this, fop);
- return EC_STATE_HEAL_DISPATCH;
+ return;
- case EC_STATE_HEAL_DATA_LOCK:
- if (heal->done)
- {
- return EC_STATE_HEAL_POST_INODELK_LOCK;
- }
+fail:
+ if (fop)
+ ec_fop_data_release(fop);
+ if (func)
+ func(frame, data, this, -1, err, 0, 0, 0, 0, NULL);
+}
- ec_heal_inodelk(heal, F_WRLCK, 1, heal->offset, heal->size);
+int
+ec_replace_heal_done(int ret, call_frame_t *heal, void *opaque)
+{
+ ec_t *ec = opaque;
+ gf_boolean_t last_fop = _gf_false;
- return EC_STATE_HEAL_DATA_COPY;
+ if (GF_ATOMIC_DEC(ec->async_fop_count) == 0) {
+ LOCK(&ec->lock);
+ {
+ last_fop = __ec_is_last_fop(ec);
+ }
+ UNLOCK(&ec->lock);
+ }
+ gf_msg_debug(ec->xl->name, 0, "getxattr on bricks is done ret %d", ret);
- case EC_STATE_HEAL_DATA_COPY:
- ec_heal_data(heal);
+ if (last_fop)
+ ec_pending_fops_completed(ec);
- return EC_STATE_HEAL_DATA_UNLOCK;
+ return 0;
+}
- case -EC_STATE_HEAL_DATA_COPY:
- case -EC_STATE_HEAL_DATA_UNLOCK:
- case EC_STATE_HEAL_DATA_UNLOCK:
- ec_heal_inodelk(heal, F_UNLCK, 1, heal->offset, heal->size);
+int32_t
+ec_replace_heal(ec_t *ec, inode_t *inode)
+{
+ loc_t loc = {0};
+ int ret = 0;
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ ret = syncop_getxattr(ec->xl, &loc, NULL, EC_XATTR_HEAL, NULL, NULL);
+ if (ret < 0)
+ gf_msg_debug(ec->xl->name, 0, "Heal failed for replace brick ret = %d",
+ ret);
+
+ /* Once the root inode has been checked, it might have triggered a
+ * self-heal on it after a replace brick command or for some other
+ * reason. It can also happen that the volume already had damaged
+ * files in the index, even if the heal on the root directory failed.
+ * In both cases we need to wake all index healers to continue
+ * healing remaining entries that are marked as dirty. */
+ ec_shd_index_healer_wake(ec);
- heal->offset += heal->size;
+ loc_wipe(&loc);
+ return ret;
+}
- return EC_STATE_HEAL_DATA_LOCK;
+int32_t
+ec_replace_brick_heal_wrap(void *opaque)
+{
+ ec_t *ec = opaque;
+ inode_table_t *itable = NULL;
+ int32_t ret = -1;
- case EC_STATE_HEAL_POST_INODELK_LOCK:
- ec_heal_inodelk(heal, F_WRLCK, 1, 0, 0);
+ if (ec->xl->itable)
+ itable = ec->xl->itable;
+ else
+ goto out;
- return EC_STATE_HEAL_POST_INODE_LOOKUP;
+ if (xlator_is_cleanup_starting(ec->xl))
+ goto out;
- case EC_STATE_HEAL_POST_INODE_LOOKUP:
- heal->fixed = heal->bad;
- ec_heal_update_dirty(heal, heal->bad);
- ec_heal_lookup(heal, heal->good);
+ ret = ec_replace_heal(ec, itable->root);
+out:
+ return ret;
+}
- return EC_STATE_HEAL_SETATTR;
+int32_t
+ec_launch_replace_heal(ec_t *ec)
+{
+ int ret = -1;
- case EC_STATE_HEAL_SETATTR:
- ec_setattr(heal->fop->frame, heal->xl, heal->fixed, EC_MINIMUM_ONE,
- ec_heal_setattr_cbk, heal, &heal->loc, &heal->iatt,
- GF_SET_ATTR_MODE | GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME, NULL);
+ ret = synctask_new(ec->xl->ctx->env, ec_replace_brick_heal_wrap,
+ ec_replace_heal_done, NULL, ec);
- return EC_STATE_HEAL_POST_INODELK_UNLOCK;
+ if (ret < 0) {
+ gf_msg_debug(ec->xl->name, 0, "Heal failed for replace brick ret = %d",
+ ret);
+ ec_replace_heal_done(-1, NULL, ec);
+ }
- case -EC_STATE_HEAL_SETATTR:
- case -EC_STATE_HEAL_POST_INODELK_UNLOCK:
- case EC_STATE_HEAL_POST_INODELK_UNLOCK:
- ec_heal_inodelk(heal, F_UNLCK, 1, 0, 0);
+ return ret;
+}
- return EC_STATE_HEAL_DISPATCH;
-
- case -EC_STATE_HEAL_POST_INODELK_LOCK:
- case -EC_STATE_HEAL_POST_INODE_LOOKUP:
- case -EC_STATE_HEAL_ENTRY_LOOKUP:
- case -EC_STATE_HEAL_DATA_LOCK:
- case -EC_STATE_HEAL_DISPATCH:
- case EC_STATE_HEAL_DISPATCH:
- ec_heal_dispatch(heal);
-
- return EC_STATE_PREPARE_ANSWER;
-
- case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret < 0)
- {
- ec_fop_set_error(fop, cbk->op_errno);
- }
- }
- else
- {
- ec_fop_set_error(fop, EIO);
- }
+int32_t
+ec_set_heal_info(dict_t **dict_rsp, char *status)
+{
+ dict_t *dict = NULL;
+ int ret = 0;
- return EC_STATE_REPORT;
+ dict = dict_new();
+ if (!dict) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = dict_set_str(dict, "heal-info", status);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_WARNING, -ret, EC_MSG_HEAL_FAIL,
+ "Failed to set heal-info key to "
+ "%s",
+ status);
+ dict_unref(dict);
+ dict = NULL;
+ }
+ *dict_rsp = dict;
+out:
+ return ret;
+}
- case EC_STATE_REPORT:
- cbk = fop->answer;
-
- GF_ASSERT(cbk != NULL);
-
- if (fop->id == EC_FOP_HEAL)
- {
- if (fop->cbks.heal != NULL)
- {
- fop->cbks.heal(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, cbk->uintptr[0],
- cbk->uintptr[1], cbk->uintptr[2],
- cbk->xdata);
- }
- }
- else
- {
- if (fop->cbks.fheal != NULL)
- {
- fop->cbks.fheal(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, cbk->uintptr[0],
- cbk->uintptr[1], cbk->uintptr[2],
- cbk->xdata);
+static int32_t
+_need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
+ gf_boolean_t self_locked, int32_t lock_count,
+ ec_heal_need_t *need_heal, uint64_t *versions)
+{
+ int i = 0;
+ int source_count = 0;
+
+ source_count = EC_COUNT(sources, ec->nodes);
+ if (source_count == ec->nodes) {
+ *need_heal = EC_HEAL_NONEED;
+ if (self_locked || lock_count == 0) {
+ for (i = 0; i < ec->nodes; i++) {
+ if (dirty[i] || (versions[i] != versions[0])) {
+ *need_heal = EC_HEAL_MUST;
+ goto out;
}
}
-
- return EC_STATE_END;
-
- case -EC_STATE_INIT:
- case -EC_STATE_DISPATCH:
- case -EC_STATE_PREPARE_ANSWER:
- case -EC_STATE_REPORT:
- GF_ASSERT(fop->error != 0);
-
- if (fop->id == EC_FOP_HEAL)
- {
- if (fop->cbks.heal != NULL)
- {
- fop->cbks.heal(fop->req_frame, fop, fop->xl, -1,
- fop->error, 0, 0, 0, NULL);
+ /* If lock count is 0, all dirty flags are 0 and all the
+ * versions are macthing then why are we here. It looks
+ * like something went wrong while removing the index entries
+ * after completing a successful heal or fop. In this case
+ * we need to remove this index entry to avoid triggering heal
+ * in a loop and causing lookups again and again*/
+ *need_heal = EC_HEAL_PURGE_INDEX;
+ } else {
+ for (i = 0; i < ec->nodes; i++) {
+ /* Since each lock can only increment the dirty
+ * count once, if dirty is > 1 it means that
+ * another operation has left the dirty count
+ * set and this indicates a problem in the
+ * inode.*/
+ if (dirty[i] > 1) {
+ *need_heal = EC_HEAL_MUST;
+ goto out;
}
- }
- else
- {
- if (fop->cbks.fheal != NULL)
- {
- fop->cbks.fheal(fop->req_frame, fop, fop->xl, -1,
- fop->error, 0, 0, 0, NULL);
+ if (dirty[i] != dirty[0] || (versions[i] != versions[0])) {
+ *need_heal = EC_HEAL_MAYBE;
}
}
+ }
+ } else {
+ *need_heal = EC_HEAL_MUST;
+ }
- return EC_STATE_END;
+out:
+ return source_count;
+}
- default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+static int32_t
+ec_need_metadata_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
+ int32_t lock_count, gf_boolean_t self_locked,
+ gf_boolean_t thorough, ec_heal_need_t *need_heal)
+{
+ uint64_t *dirty = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *healed_sinks = NULL;
+ uint64_t *meta_versions = NULL;
+ int ret = 0;
+
+ sources = alloca0(ec->nodes);
+ healed_sinks = alloca0(ec->nodes);
+ dirty = alloca0(ec->nodes * sizeof(*dirty));
+ meta_versions = alloca0(ec->nodes * sizeof(*meta_versions));
+ ret = ec_heal_metadata_find_direction(ec, replies, meta_versions, dirty,
+ sources, healed_sinks);
+ if (ret < 0 && ret != -EIO) {
+ goto out;
+ }
- return EC_STATE_END;
+ ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
+ need_heal, meta_versions);
+out:
+ return ret;
+}
+
+static int32_t
+ec_need_data_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
+ int32_t lock_count, gf_boolean_t self_locked,
+ gf_boolean_t thorough, ec_heal_need_t *need_heal)
+{
+ uint64_t *dirty = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *healed_sinks = NULL;
+ uint64_t *data_versions = NULL;
+ uint64_t *size = NULL;
+ int ret = 0;
+
+ sources = alloca0(ec->nodes);
+ healed_sinks = alloca0(ec->nodes);
+ dirty = alloca0(ec->nodes * sizeof(*dirty));
+ data_versions = alloca0(ec->nodes * sizeof(*data_versions));
+ size = alloca0(ec->nodes * sizeof(*size));
+
+ /* When dd is going on and heal info is called there is a very good
+ * chance for on disk sizes to mismatch even though nothing is wrong
+ * we don't need ondisk size check there. But if the file is either
+ * self-locked or the caller wants a thorough check then make sure to
+ * perform on disk check also. */
+ ret = ec_heal_data_find_direction(
+ ec, replies, data_versions, dirty, size, sources, healed_sinks,
+ self_locked || thorough, EC_COMBINE_XDATA);
+ if (ret < 0 && ret != -EIO) {
+ goto out;
}
+
+ ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
+ need_heal, data_versions);
+out:
+ return ret;
}
-void ec_heal(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_heal_cbk_t func, void * data, loc_t * loc,
- int32_t partial, dict_t *xdata)
+static int32_t
+ec_need_entry_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
+ int32_t lock_count, gf_boolean_t self_locked,
+ gf_boolean_t thorough, ec_heal_need_t *need_heal)
{
- ec_cbk_t callback = { .heal = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ uint64_t *dirty = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *healed_sinks = NULL;
+ uint64_t *data_versions = NULL;
+ int ret = 0;
+
+ sources = alloca0(ec->nodes);
+ healed_sinks = alloca0(ec->nodes);
+ dirty = alloca0(ec->nodes * sizeof(*dirty));
+ data_versions = alloca0(ec->nodes * sizeof(*data_versions));
+
+ ret = ec_heal_entry_find_direction(ec, replies, data_versions, dirty,
+ sources, healed_sinks);
+ if (ret < 0 && ret != -EIO) {
+ goto out;
+ }
- gf_log("ec", GF_LOG_TRACE, "EC(HEAL) %p", frame);
+ ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
+ need_heal, data_versions);
+out:
+ return ret;
+}
- VALIDATE_OR_GOTO(this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+static int32_t
+ec_need_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
+ int32_t lock_count, gf_boolean_t self_locked,
+ gf_boolean_t thorough, ec_heal_need_t *need_heal)
+{
+ int ret = 0;
- fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL,
- EC_FLAG_UPDATE_LOC_INODE, target, minimum,
- ec_wind_heal, ec_manager_heal, callback, data);
- if (fop == NULL)
- {
+ ret = ec_need_metadata_heal(ec, inode, replies, lock_count, self_locked,
+ thorough, need_heal);
+ if (ret < 0)
goto out;
+
+ if (*need_heal == EC_HEAL_MUST)
+ goto out;
+
+ if (inode->ia_type == IA_IFREG) {
+ ret = ec_need_data_heal(ec, inode, replies, lock_count, self_locked,
+ thorough, need_heal);
+ } else if (inode->ia_type == IA_IFDIR) {
+ ret = ec_need_entry_heal(ec, inode, replies, lock_count, self_locked,
+ thorough, need_heal);
}
- fop->int32 = partial;
+out:
+ return ret;
+}
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+int32_t
+ec_heal_inspect(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *locked_on, gf_boolean_t self_locked,
+ gf_boolean_t thorough, ec_heal_need_t *need_heal)
+{
+ loc_t loc = {0};
+ int i = 0;
+ int ret = 0;
+ dict_t *xdata = NULL;
+ uint64_t zero_array[2] = {0};
+ uint64_t zero_value = 0;
+ unsigned char *output = NULL;
+ default_args_cbk_t *replies = NULL;
+ int32_t lock_count = 0;
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ output = alloca0(ec->nodes);
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
- goto out;
- }
+ xdata = dict_new();
+ if (!xdata ||
+ dict_set_static_bin(xdata, EC_XATTR_VERSION, zero_array,
+ sizeof(zero_array)) ||
+ dict_set_static_bin(xdata, EC_XATTR_DIRTY, zero_array,
+ sizeof(zero_array)) ||
+ dict_set_static_bin(xdata, EC_XATTR_SIZE, &zero_value,
+ sizeof(zero_value))) {
+ ret = -ENOMEM;
+ goto out;
}
- if (xdata != NULL)
- {
- fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (!self_locked) {
+ ret = dict_set_str(xdata, GLUSTERFS_INODELK_DOM_COUNT, ec->xl->name);
+ if (ret) {
+ ret = -ENOMEM;
goto out;
}
}
- error = 0;
+ ret = cluster_lookup(ec->xl_list, locked_on, ec->nodes, replies, output,
+ frame, ec->xl, &loc, xdata);
-out:
- if (fop != NULL)
- {
- ec_manager(fop, error);
+ if (ret != ec->nodes) {
+ ret = ec->nodes;
+ *need_heal = EC_HEAL_MUST;
+ goto out;
}
- else
- {
- func(frame, NULL, this, -1, EIO, 0, 0, 0, NULL);
+
+ if (self_locked)
+ goto need_heal;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!output[i] || !replies[i].xdata) {
+ continue;
+ }
+ if ((dict_get_int32(replies[i].xdata, GLUSTERFS_INODELK_COUNT,
+ &lock_count) == 0) &&
+ lock_count > 0) {
+ break;
+ }
+ }
+need_heal:
+ ret = ec_need_heal(ec, inode, replies, lock_count, self_locked, thorough,
+ need_heal);
+out:
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ if (xdata) {
+ dict_unref(xdata);
}
+ return ret;
}
-/* FOP: fheal */
+int32_t
+ec_heal_locked_inspect(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ ec_heal_need_t *need_heal)
+{
+ unsigned char *locked_on = NULL;
+ unsigned char *up_subvols = NULL;
+ unsigned char *output = NULL;
+ default_args_cbk_t *replies = NULL;
+ int ret = 0;
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ locked_on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ up_subvols = alloca0(ec->nodes);
+ ec_mask_to_char_array(ec->xl_up, up_subvols, ec->nodes);
+
+ ret = cluster_inodelk(ec->xl_list, up_subvols, ec->nodes, replies,
+ locked_on, frame, ec->xl, ec->xl->name, inode, 0, 0);
+ if (ret != ec->nodes) {
+ *need_heal = EC_HEAL_MUST;
+ goto unlock;
+ }
+ ret = ec_heal_inspect(frame, ec, inode, locked_on, _gf_true, _gf_true,
+ need_heal);
+unlock:
+ cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
+ ec->xl, ec->xl->name, inode, 0, 0);
+ cluster_replies_wipe(replies, ec->nodes);
+ return ret;
+}
-void ec_wind_fheal(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+int32_t
+ec_get_heal_info(xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp)
{
- ec_cbk_data_t * cbk;
- ec_heal_t *heal = fop->heal;
+ int ret = -ENOMEM;
+ ec_heal_need_t need_heal = EC_HEAL_NONEED;
+ call_frame_t *frame = NULL;
+ ec_t *ec = NULL;
+ unsigned char *up_subvols = NULL;
+ loc_t loc = {
+ 0,
+ };
- ec_trace("WIND", fop, "idx=%d", idx);
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, entry_loc, out);
- cbk = ec_cbk_data_allocate(fop->frame, fop->xl, fop, EC_FOP_FHEAL, idx,
- fop->error == 0 ? 0 : -1, fop->error);
- if (cbk != NULL)
- {
- cbk->uintptr[0] = heal->available;
- cbk->uintptr[1] = heal->good;
- cbk->uintptr[2] = heal->bad;
+ ec = this->private;
+ up_subvols = alloca0(ec->nodes);
+ ec_mask_to_char_array(ec->xl_up, up_subvols, ec->nodes);
- ec_combine(cbk, NULL);
+ if (EC_COUNT(up_subvols, ec->nodes) != ec->nodes) {
+ need_heal = EC_HEAL_MUST;
+ goto set_heal;
}
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ goto out;
+ }
+ ec_owner_set(frame, frame->root);
+ frame->root->uid = 0;
+ frame->root->gid = 0;
+ frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
- ec_complete(fop);
-}
-
-void ec_fheal(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_fheal_cbk_t func, void * data, fd_t * fd,
- int32_t partial, dict_t *xdata)
-{
- ec_fd_t * ctx = ec_fd_get(fd, this);
+ if (loc_copy(&loc, entry_loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+ goto out;
+ }
+ if (!loc.inode) {
+ ret = syncop_inode_find(this, this, loc.gfid, &loc.inode, NULL, NULL);
+ if (ret < 0)
+ goto out;
+ }
- if (ctx != NULL)
- {
- gf_log("ec", GF_LOG_DEBUG, "FHEAL ctx: flags=%X, open=%lX, bad=%lX",
- ctx->flags, ctx->open, ctx->bad);
- ec_heal(frame, this, target, minimum, func, data, &ctx->loc, partial,
- xdata);
+ ret = ec_heal_inspect(frame, ec, loc.inode, up_subvols, _gf_false,
+ _gf_false, &need_heal);
+ if (ret == ec->nodes && need_heal != EC_HEAL_MAYBE) {
+ goto set_heal;
+ }
+ need_heal = EC_HEAL_NONEED;
+ ret = ec_heal_locked_inspect(frame, ec, loc.inode, &need_heal);
+ if (ret < 0)
+ goto out;
+set_heal:
+ if (need_heal == EC_HEAL_MUST) {
+ ret = ec_set_heal_info(dict_rsp, "heal");
+ } else {
+ ret = ec_set_heal_info(dict_rsp, "no-heal");
}
+out:
+ if (frame) {
+ STACK_DESTROY(frame->root);
+ }
+ loc_wipe(&loc);
+ return ret;
}
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
index 53b3996590c..5c1586bc9c5 100644
--- a/xlators/cluster/ec/src/ec-heald.c
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -8,592 +8,674 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-#include "compat-errno.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/compat-errno.h>
#include "ec.h"
+#include "ec-messages.h"
#include "ec-heald.h"
#include "ec-mem-types.h"
-#include "syncop.h"
-#include "syncop-utils.h"
+#include <glusterfs/syncop.h>
+#include <glusterfs/syncop-utils.h>
#include "protocol-common.h"
-#define SHD_INODE_LRU_LIMIT 2048
-#define ASSERT_LOCAL(this, healer) \
- do { \
- if (!ec_shd_is_subvol_local (this, healer->subvol)) { \
- healer->local = _gf_false; \
- if (safe_break (healer)) { \
- break; \
- } else { \
- continue; \
- } \
- } else { \
- healer->local = _gf_true; \
- } \
- } while (0);
-
-
-#define NTH_INDEX_HEALER(this, n) (&((((ec_t *)this->private))->shd.index_healers[n]))
-#define NTH_FULL_HEALER(this, n) (&((((ec_t *)this->private))->shd.full_healers[n]))
+#define NTH_INDEX_HEALER(this, n) \
+ (&((((ec_t *)this->private))->shd.index_healers[n]))
+#define NTH_FULL_HEALER(this, n) \
+ (&((((ec_t *)this->private))->shd.full_healers[n]))
gf_boolean_t
-ec_shd_is_subvol_local (xlator_t *this, int subvol)
+ec_shd_is_subvol_local(xlator_t *this, int subvol)
{
- ec_t *ec = NULL;
- gf_boolean_t is_local = _gf_false;
- loc_t loc = {0, };
-
- ec = this->private;
- loc.inode = this->itable->root;
- syncop_is_subvol_local (ec->xl_list[subvol], &loc, &is_local);
- return is_local;
+ ec_t *ec = NULL;
+ gf_boolean_t is_local = _gf_false;
+ loc_t loc = {
+ 0,
+ };
+
+ ec = this->private;
+ loc.inode = this->itable->root;
+ syncop_is_subvol_local(ec->xl_list[subvol], &loc, &is_local);
+ return is_local;
}
char *
-ec_subvol_name (xlator_t *this, int subvol)
+ec_subvol_name(xlator_t *this, int subvol)
{
- ec_t *ec = NULL;
+ ec_t *ec = NULL;
- ec = this->private;
- if (subvol < 0 || subvol > ec->nodes)
- return NULL;
+ ec = this->private;
+ if (subvol < 0 || subvol > ec->nodes)
+ return NULL;
- return ec->xl_list[subvol]->name;
+ return ec->xl_list[subvol]->name;
}
int
-__ec_shd_healer_wait (struct subvol_healer *healer)
+__ec_shd_healer_wait(struct subvol_healer *healer)
{
- ec_t *ec = NULL;
- struct timespec wait_till = {0, };
- int ret = 0;
+ ec_t *ec = NULL;
+ struct timespec wait_till = {
+ 0,
+ };
+ int ret = 0;
- ec = healer->this->private;
+ ec = healer->this->private;
disabled_loop:
- wait_till.tv_sec = time (NULL) + 60;
-
- while (!healer->rerun) {
- ret = pthread_cond_timedwait (&healer->cond,
- &healer->mutex,
- &wait_till);
- if (ret == ETIMEDOUT)
- break;
- }
+ wait_till.tv_sec = gf_time() + ec->shd.timeout;
- ret = healer->rerun;
- healer->rerun = 0;
+ while (!healer->rerun) {
+ ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till);
+ if (ret == ETIMEDOUT)
+ break;
+ }
- if (!ec->shd.enabled || !ec->up)
- goto disabled_loop;
+ if (ec->shutdown) {
+ healer->running = _gf_false;
+ return -1;
+ }
- return ret;
-}
+ ret = healer->rerun;
+ healer->rerun = 0;
+ if (!ec->shd.enabled || !ec->up)
+ goto disabled_loop;
+
+ return ret;
+}
int
-ec_shd_healer_wait (struct subvol_healer *healer)
+ec_shd_healer_wait(struct subvol_healer *healer)
{
- int ret = 0;
+ int ret = 0;
- pthread_mutex_lock (&healer->mutex);
- {
- ret = __ec_shd_healer_wait (healer);
- }
- pthread_mutex_unlock (&healer->mutex);
+ pthread_mutex_lock(&healer->mutex);
+ {
+ ret = __ec_shd_healer_wait(healer);
+ }
+ pthread_mutex_unlock(&healer->mutex);
- return ret;
+ return ret;
}
-
-gf_boolean_t
-safe_break (struct subvol_healer *healer)
+int
+ec_shd_index_inode(xlator_t *this, xlator_t *subvol, inode_t **inode)
{
- gf_boolean_t ret = _gf_false;
+ loc_t rootloc = {
+ 0,
+ };
+ int ret = 0;
+ dict_t *xattr = NULL;
+ void *index_gfid = NULL;
+
+ *inode = NULL;
+ rootloc.inode = inode_ref(this->itable->root);
+ gf_uuid_copy(rootloc.gfid, rootloc.inode->gfid);
+
+ ret = syncop_getxattr(subvol, &rootloc, &xattr, GF_XATTROP_INDEX_GFID, NULL,
+ NULL);
+ if (ret < 0)
+ goto out;
+ if (!xattr) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = dict_get_ptr(xattr, GF_XATTROP_INDEX_GFID, &index_gfid);
+ if (ret)
+ goto out;
+
+ gf_msg_debug(this->name, 0, "index-dir gfid for %s: %s", subvol->name,
+ uuid_utoa(index_gfid));
+
+ ret = syncop_inode_find(this, subvol, index_gfid, inode, NULL, NULL);
- pthread_mutex_lock (&healer->mutex);
- {
- if (healer->rerun)
- goto unlock;
+out:
+ loc_wipe(&rootloc);
- healer->running = _gf_false;
- ret = _gf_true;
- }
-unlock:
- pthread_mutex_unlock (&healer->mutex);
+ if (xattr)
+ dict_unref(xattr);
- return ret;
+ return ret;
}
-
-inode_t *
-ec_shd_inode_find (xlator_t *this, xlator_t *subvol, uuid_t gfid)
+int
+ec_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name)
{
- inode_t *inode = NULL;
- int ret = 0;
- loc_t loc = {0, };
- struct iatt iatt = {0, };
-
- inode = inode_find (this->itable, gfid);
- if (inode) {
- inode_lookup (inode);
- goto out;
- }
+ loc_t loc = {
+ 0,
+ };
+ int ret = 0;
- loc.inode = inode_new (this->itable);
- if (!loc.inode)
- goto out;
- gf_uuid_copy (loc.gfid, gfid);
+ loc.parent = inode_ref(inode);
+ loc.name = name;
- ret = syncop_lookup (subvol, &loc, &iatt, NULL, NULL, NULL);
- if (ret < 0)
- goto out;
+ ret = syncop_unlink(subvol, &loc, NULL, NULL);
- inode = inode_link (loc.inode, NULL, NULL, &iatt);
- if (inode)
- inode_lookup (inode);
-out:
- loc_wipe (&loc);
- return inode;
+ loc_wipe(&loc);
+ return ret;
}
+static gf_boolean_t
+ec_is_heal_completed(char *status)
+{
+ char *bad_pos = NULL;
+ char *zero_pos = NULL;
+
+ if (!status) {
+ return _gf_false;
+ }
+
+ /*Logic:
+ * Status will be of the form Good: <binary>, Bad: <binary>
+ * If heal completes, if we do strchr for '0' it should be present after
+ * 'Bad:' i.e. strRchr for ':'
+ * */
+
+ zero_pos = strchr(status, '0');
+ bad_pos = strrchr(status, ':');
+ if (!zero_pos || !bad_pos) {
+ /*malformed status*/
+ return _gf_false;
+ }
+
+ if (zero_pos > bad_pos) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
-inode_t*
-ec_shd_index_inode (xlator_t *this, xlator_t *subvol)
+int
+ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
+ gf_boolean_t full)
{
- loc_t rootloc = {0, };
- inode_t *inode = NULL;
- int ret = 0;
- dict_t *xattr = NULL;
- void *index_gfid = NULL;
-
- rootloc.inode = inode_ref (this->itable->root);
- gf_uuid_copy (rootloc.gfid, rootloc.inode->gfid);
-
- ret = syncop_getxattr (subvol, &rootloc, &xattr,
- GF_XATTROP_INDEX_GFID, NULL, NULL);
- if (ret || !xattr) {
- errno = -ret;
- goto out;
+ dict_t *xdata = NULL;
+ dict_t *dict = NULL;
+ uint32_t count;
+ int32_t ret;
+ char *heal_status = NULL;
+ ec_t *ec = healer->this->private;
+
+ GF_ATOMIC_INC(ec->stats.shd.attempted);
+ ret = syncop_getxattr(healer->this, loc, &dict, EC_XATTR_HEAL, NULL,
+ &xdata);
+ if (ret == 0) {
+ if (dict && (dict_get_str(dict, EC_XATTR_HEAL, &heal_status) == 0)) {
+ if (ec_is_heal_completed(heal_status)) {
+ GF_ATOMIC_INC(ec->stats.shd.completed);
+ }
}
+ }
+
+ if (!full && (loc->inode->ia_type == IA_IFDIR)) {
+ /* If we have just healed a directory, it's possible that
+ * other index entries have appeared to be healed. */
+ if ((xdata != NULL) &&
+ (dict_get_uint32(xdata, EC_XATTR_HEAL_NEW, &count) == 0) &&
+ (count > 0)) {
+ /* Force a rerun of the index healer. */
+ gf_msg_debug(healer->this->name, 0, "%d more entries to heal",
+ count);
+
+ healer->rerun = _gf_true;
+ }
+ }
- ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid);
- if (ret)
- goto out;
-
- gf_log (this->name, GF_LOG_DEBUG, "index-dir gfid for %s: %s",
- subvol->name, uuid_utoa (index_gfid));
-
- inode = ec_shd_inode_find (this, subvol, index_gfid);
-
-out:
- loc_wipe (&rootloc);
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
- if (xattr)
- dict_unref (xattr);
+ if (dict) {
+ dict_unref(dict);
+ }
- return inode;
+ return ret;
}
int
-ec_shd_index_purge (xlator_t *subvol, inode_t *inode, char *name)
+ec_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
{
- loc_t loc = {0, };
- int ret = 0;
-
- loc.parent = inode_ref (inode);
- loc.name = name;
+ struct subvol_healer *healer = data;
+ ec_t *ec = NULL;
+ loc_t loc = {0};
+ int ret = 0;
+
+ ec = healer->this->private;
+ if (ec->xl_up_count <= ec->fragments) {
+ return -ENOTCONN;
+ }
+ if (!ec->shd.enabled)
+ return -EBUSY;
+
+ gf_msg_debug(healer->this->name, 0, "got entry: %s", entry->d_name);
+
+ ret = gf_uuid_parse(entry->d_name, loc.gfid);
+ if (ret)
+ return 0;
- ret = syncop_unlink (subvol, &loc, NULL, NULL);
+ /* If this fails with ENOENT/ESTALE index is stale */
+ ret = syncop_gfid_to_path(healer->this->itable, subvol, loc.gfid,
+ (char **)&loc.path);
+ if (ret < 0)
+ goto out;
- loc_wipe (&loc);
- return ret;
-}
+ ret = syncop_inode_find(healer->this, healer->this, loc.gfid, &loc.inode,
+ NULL, NULL);
+ if (ret < 0)
+ goto out;
-int
-ec_shd_selfheal (struct subvol_healer *healer, int child, loc_t *loc)
-{
- return syncop_getxattr (healer->this, loc, NULL, EC_XATTR_HEAL, NULL,
- NULL);
+ ec_shd_selfheal(healer, healer->subvol, &loc, _gf_false);
+out:
+ if (ret == -ENOENT || ret == -ESTALE) {
+ gf_msg(healer->this->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
+ "Purging index for gfid %s:", uuid_utoa(loc.gfid));
+ ec_shd_index_purge(subvol, parent->inode, entry->d_name);
+ }
+ loc_wipe(&loc);
+
+ return 0;
}
-
int
-ec_shd_index_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
- void *data)
+ec_shd_index_sweep(struct subvol_healer *healer)
{
- struct subvol_healer *healer = data;
- ec_t *ec = NULL;
- loc_t loc = {0};
- int ret = 0;
-
- ec = healer->this->private;
- if (!ec->shd.enabled)
- return -EBUSY;
-
- gf_log (healer->this->name, GF_LOG_DEBUG, "got entry: %s",
- entry->d_name);
-
- ret = gf_uuid_parse (entry->d_name, loc.gfid);
- if (ret)
- return 0;
-
- /* If this fails with ENOENT/ESTALE index is stale */
- ret = syncop_gfid_to_path (healer->this->itable, subvol, loc.gfid,
- (char **)&loc.path);
- if (ret == -ENOENT || ret == -ESTALE) {
- ec_shd_index_purge (subvol, parent->inode, entry->d_name);
- goto out;
- }
-
- loc.inode = ec_shd_inode_find (healer->this, healer->this, loc.gfid);
- if (!loc.inode)
- goto out;
-
- ec_shd_selfheal (healer, healer->subvol, &loc);
-
+ loc_t loc = {0};
+ ec_t *ec = NULL;
+ int ret = 0;
+ xlator_t *subvol = NULL;
+ dict_t *xdata = NULL;
+
+ ec = healer->this->private;
+ subvol = ec->xl_list[healer->subvol];
+
+ ret = ec_shd_index_inode(healer->this, subvol, &loc.inode);
+ if (ret < 0) {
+ gf_msg(healer->this->name, GF_LOG_WARNING, errno,
+ EC_MSG_INDEX_DIR_GET_FAIL, "unable to get index-dir on %s",
+ subvol->name);
+ goto out;
+ }
+
+ xdata = dict_new();
+ if (!xdata || dict_set_int32(xdata, "get-gfid-type", 1)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ _mask_cancellation();
+ ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
+ healer, ec_shd_index_heal, xdata,
+ ec->shd.max_threads, ec->shd.wait_qlength);
+ _unmask_cancellation();
out:
- loc_wipe (&loc);
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(&loc);
- return 0;
+ return ret;
}
int
-ec_shd_index_sweep (struct subvol_healer *healer)
+ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
{
- loc_t loc = {0};
- ec_t *ec = NULL;
- int ret = 0;
- xlator_t *subvol = NULL;
-
- ec = healer->this->private;
- subvol = ec->xl_list[healer->subvol];
-
- loc.inode = ec_shd_index_inode (healer->this, subvol);
- if (!loc.inode) {
- gf_log (healer->this->name, GF_LOG_WARNING,
- "unable to get index-dir on %s", subvol->name);
- return -errno;
- }
-
- ret = syncop_dir_scan (subvol, &loc, GF_CLIENT_PID_AFR_SELF_HEALD,
- healer, ec_shd_index_heal);
+ struct subvol_healer *healer = data;
+ xlator_t *this = healer->this;
+ ec_t *ec = NULL;
+ loc_t loc = {0};
+ int ret = 0;
+
+ ec = this->private;
+
+ if (this->cleanup_starting) {
+ return -ENOTCONN;
+ }
+
+ if (ec->xl_up_count <= ec->fragments) {
+ return -ENOTCONN;
+ }
+ if (!ec->shd.enabled)
+ return -EBUSY;
+
+ if (gf_uuid_is_null(entry->d_stat.ia_gfid)) {
+ /* It's possible that an entry has been removed just after
+ * being seen in a directory but before getting its stat info.
+ * In this case we'll receive a NULL gfid here. Since the file
+ * doesn't exist anymore, we can safely ignore it. */
+ return 0;
+ }
- inode_forget (loc.inode, 1);
- loc_wipe (&loc);
+ loc.parent = inode_ref(parent->inode);
+ loc.name = entry->d_name;
+ gf_uuid_copy(loc.gfid, entry->d_stat.ia_gfid);
- return ret;
-}
+ /* If this fails with ENOENT/ESTALE index is stale */
+ ret = syncop_gfid_to_path(this->itable, subvol, loc.gfid,
+ (char **)&loc.path);
+ if (ret < 0)
+ goto out;
-int
-ec_shd_full_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
- void *data)
-{
- struct subvol_healer *healer = data;
- xlator_t *this = healer->this;
- ec_t *ec = NULL;
- loc_t loc = {0};
- int ret = 0;
-
- ec = this->private;
- if (!ec->shd.enabled)
- return -EBUSY;
-
- loc.parent = inode_ref (parent->inode);
- loc.name = entry->d_name;
- gf_uuid_copy (loc.gfid, entry->d_stat.ia_gfid);
-
- /* If this fails with ENOENT/ESTALE index is stale */
- ret = syncop_gfid_to_path (this->itable, subvol, loc.gfid,
- (char **)&loc.path);
- if (ret < 0)
- goto out;
-
- loc.inode = ec_shd_inode_find (this, this, loc.gfid);
- if (!loc.inode) {
- ret = -EINVAL;
- goto out;
- }
+ ret = syncop_inode_find(this, this, loc.gfid, &loc.inode, NULL, NULL);
+ if (ret < 0)
+ goto out;
- ec_shd_selfheal (healer, healer->subvol, &loc);
+ ec_shd_selfheal(healer, healer->subvol, &loc, _gf_true);
- loc_wipe (&loc);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ loc_wipe(&loc);
+ return ret;
}
int
-ec_shd_full_sweep (struct subvol_healer *healer, inode_t *inode)
+ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode)
{
- ec_t *ec = NULL;
- loc_t loc = {0};
-
- ec = healer->this->private;
- loc.inode = inode;
- return syncop_ftw (ec->xl_list[healer->subvol], &loc,
- GF_CLIENT_PID_AFR_SELF_HEALD, healer,
- ec_shd_full_heal);
+ ec_t *ec = NULL;
+ loc_t loc = {0};
+ int ret = -1;
+
+ ec = healer->this->private;
+ loc.inode = inode;
+ _mask_cancellation();
+ ret = syncop_ftw(ec->xl_list[healer->subvol], &loc,
+ GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
+ _unmask_cancellation();
+ return ret;
}
-
void *
-ec_shd_index_healer (void *data)
+ec_shd_index_healer(void *data)
{
- struct subvol_healer *healer = NULL;
- xlator_t *this = NULL;
-
- healer = data;
- THIS = this = healer->this;
-
- for (;;) {
- ec_shd_healer_wait (healer);
-
- ASSERT_LOCAL(this, healer);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "starting index sweep on subvol %s",
- ec_subvol_name (this, healer->subvol));
-
- ec_shd_index_sweep (healer);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "finished index sweep on subvol %s",
- ec_subvol_name (this, healer->subvol));
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+ int run = 0;
+
+ healer = data;
+ THIS = this = healer->this;
+ ec_t *ec = this->private;
+
+ for (;;) {
+ run = ec_shd_healer_wait(healer);
+ if (run == -1)
+ break;
+
+ if (ec->xl_up_count > ec->fragments) {
+ gf_msg_debug(this->name, 0, "starting index sweep on subvol %s",
+ ec_subvol_name(this, healer->subvol));
+ ec_shd_index_sweep(healer);
}
+ gf_msg_debug(this->name, 0, "finished index sweep on subvol %s",
+ ec_subvol_name(this, healer->subvol));
+ }
- return NULL;
+ return NULL;
}
-
void *
-ec_shd_full_healer (void *data)
+ec_shd_full_healer(void *data)
{
- struct subvol_healer *healer = NULL;
- xlator_t *this = NULL;
-
- int run = 0;
-
- healer = data;
- THIS = this = healer->this;
-
- for (;;) {
- pthread_mutex_lock (&healer->mutex);
- {
- run = __ec_shd_healer_wait (healer);
- if (!run)
- healer->running = _gf_false;
- }
- pthread_mutex_unlock (&healer->mutex);
-
- if (!run)
- break;
-
- ASSERT_LOCAL(this, healer);
-
- gf_log (this->name, GF_LOG_INFO,
- "starting full sweep on subvol %s",
- ec_subvol_name (this, healer->subvol));
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+ loc_t rootloc = {0};
+
+ int run = 0;
+
+ healer = data;
+ THIS = this = healer->this;
+ ec_t *ec = this->private;
+
+ rootloc.inode = this->itable->root;
+ for (;;) {
+ run = ec_shd_healer_wait(healer);
+ if (run < 0) {
+ break;
+ } else if (run == 0) {
+ continue;
+ }
- ec_shd_full_sweep (healer, this->itable->root);
+ if (ec->xl_up_count > ec->fragments) {
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START,
+ "starting full sweep on subvol %s",
+ ec_subvol_name(this, healer->subvol));
- gf_log (this->name, GF_LOG_INFO,
- "finished full sweep on subvol %s",
- ec_subvol_name (this, healer->subvol));
+ ec_shd_selfheal(healer, healer->subvol, &rootloc, _gf_true);
+ ec_shd_full_sweep(healer, this->itable->root);
}
- return NULL;
-}
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_STOP,
+ "finished full sweep on subvol %s",
+ ec_subvol_name(this, healer->subvol));
+ }
+ return NULL;
+}
int
-ec_shd_healer_init (xlator_t *this, struct subvol_healer *healer)
+ec_shd_healer_init(xlator_t *this, struct subvol_healer *healer)
{
- int ret = 0;
+ int ret = 0;
- ret = pthread_mutex_init (&healer->mutex, NULL);
- if (ret)
- goto out;
+ ret = pthread_mutex_init(&healer->mutex, NULL);
+ if (ret)
+ goto out;
- ret = pthread_cond_init (&healer->cond, NULL);
- if (ret)
- goto out;
+ ret = pthread_cond_init(&healer->cond, NULL);
+ if (ret)
+ goto out;
- healer->this = this;
- healer->running = _gf_false;
- healer->rerun = _gf_false;
- healer->local = _gf_false;
+ healer->this = this;
+ healer->running = _gf_false;
+ healer->rerun = _gf_false;
out:
- return ret;
+ return ret;
}
-
int
-ec_shd_healer_spawn (xlator_t *this, struct subvol_healer *healer,
- void *(threadfn)(void *))
+ec_shd_healer_spawn(xlator_t *this, struct subvol_healer *healer,
+ void *(threadfn)(void *))
{
- int ret = 0;
-
- pthread_mutex_lock (&healer->mutex);
- {
- if (healer->running) {
- pthread_cond_signal (&healer->cond);
- } else {
- ret = gf_thread_create (&healer->thread, NULL,
- threadfn, healer);
- if (ret)
- goto unlock;
- healer->running = 1;
- }
-
- healer->rerun = 1;
+ int ret = 0;
+
+ pthread_mutex_lock(&healer->mutex);
+ {
+ if (healer->running) {
+ pthread_cond_signal(&healer->cond);
+ } else {
+ ret = gf_thread_create(&healer->thread, NULL, threadfn, healer,
+ "ecshd");
+ if (ret)
+ goto unlock;
+ healer->running = 1;
}
+
+ healer->rerun = 1;
+ }
unlock:
- pthread_mutex_unlock (&healer->mutex);
+ pthread_mutex_unlock(&healer->mutex);
- return ret;
+ return ret;
}
int
-ec_shd_full_healer_spawn (xlator_t *this, int subvol)
+ec_shd_full_healer_spawn(xlator_t *this, int subvol)
{
- return ec_shd_healer_spawn (this, NTH_FULL_HEALER (this, subvol),
- ec_shd_full_healer);
-}
+ if (xlator_is_cleanup_starting(this))
+ return -1;
+ return ec_shd_healer_spawn(this, NTH_FULL_HEALER(this, subvol),
+ ec_shd_full_healer);
+}
int
-ec_shd_index_healer_spawn (xlator_t *this, int subvol)
+ec_shd_index_healer_spawn(xlator_t *this, int subvol)
{
- return ec_shd_healer_spawn (this, NTH_INDEX_HEALER (this, subvol),
- ec_shd_index_healer);
+ if (xlator_is_cleanup_starting(this))
+ return -1;
+
+ return ec_shd_healer_spawn(this, NTH_INDEX_HEALER(this, subvol),
+ ec_shd_index_healer);
}
void
-ec_selfheal_childup (ec_t *ec, int child)
+ec_shd_index_healer_wake(ec_t *ec)
{
- if (!ec->shd.iamshd)
- return;
- ec_shd_index_healer_spawn (ec->xl, child);
+ int32_t i;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (((ec->xl_up >> i) & 1) != 0) {
+ ec_shd_index_healer_spawn(ec->xl, i);
+ }
+ }
}
int
-ec_selfheal_daemon_init (xlator_t *this)
+ec_selfheal_daemon_init(xlator_t *this)
{
- ec_t *ec = NULL;
- ec_self_heald_t *shd = NULL;
- int ret = -1;
- int i = 0;
-
- ec = this->private;
- shd = &ec->shd;
-
- this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this);
- if (!this->itable)
- goto out;
-
- shd->index_healers = GF_CALLOC (sizeof(*shd->index_healers),
- ec->nodes,
- ec_mt_subvol_healer_t);
- if (!shd->index_healers)
- goto out;
-
- for (i = 0; i < ec->nodes; i++) {
- shd->index_healers[i].subvol = i;
- ret = ec_shd_healer_init (this, &shd->index_healers[i]);
- if (ret)
- goto out;
- }
+ ec_t *ec = NULL;
+ ec_self_heald_t *shd = NULL;
+ int ret = -1;
+ int i = 0;
+
+ ec = this->private;
+ shd = &ec->shd;
+
+ shd->index_healers = GF_CALLOC(sizeof(*shd->index_healers), ec->nodes,
+ ec_mt_subvol_healer_t);
+ if (!shd->index_healers)
+ goto out;
+
+ for (i = 0; i < ec->nodes; i++) {
+ shd->index_healers[i].subvol = i;
+ ret = ec_shd_healer_init(this, &shd->index_healers[i]);
+ if (ret)
+ goto out;
+ }
- shd->full_healers = GF_CALLOC (sizeof(*shd->full_healers),
- ec->nodes,
- ec_mt_subvol_healer_t);
- if (!shd->full_healers)
- goto out;
-
- for (i = 0; i < ec->nodes; i++) {
- shd->full_healers[i].subvol = i;
- ret = ec_shd_healer_init (this, &shd->full_healers[i]);
- if (ret)
- goto out;
- }
+ shd->full_healers = GF_CALLOC(sizeof(*shd->full_healers), ec->nodes,
+ ec_mt_subvol_healer_t);
+ if (!shd->full_healers)
+ goto out;
- ret = 0;
+ for (i = 0; i < ec->nodes; i++) {
+ shd->full_healers[i].subvol = i;
+ ret = ec_shd_healer_init(this, &shd->full_healers[i]);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-ec_heal_op (xlator_t *this, dict_t *output, gf_xl_afr_op_t op, int xl_id)
+ec_heal_op(xlator_t *this, dict_t *output, gf_xl_afr_op_t op, int xl_id)
{
- char key[64] = {0};
- int op_ret = 0;
- ec_t *ec = NULL;
- int i = 0;
- GF_UNUSED int ret = 0;
-
- ec = this->private;
-
- for (i = 0; i < ec->nodes; i++) {
- snprintf (key, sizeof (key), "%d-%d-status", xl_id, i);
-
- op_ret = -1;
- if (((ec->xl_up >> i) & 1) == 0) {
- ret = dict_set_str (output, key, "Brick is not connected");
- } else if (!ec->up) {
- ret = dict_set_str (output, key,
- "Disperse subvolume is not up");
- } else if (!ec_shd_is_subvol_local (this, i)) {
- ret = dict_set_str (output, key, "Brick is remote");
- } else {
- ret = dict_set_str (output, key, "Started self-heal");
- if (op == GF_SHD_OP_HEAL_FULL) {
- ec_shd_full_healer_spawn (this, i);
- } else if (op == GF_SHD_OP_HEAL_INDEX) {
- ec_shd_index_healer_spawn (this, i);
- }
- op_ret = 0;
- }
+ char key[64] = {0};
+ int op_ret = 0;
+ ec_t *ec = NULL;
+ int i = 0;
+ GF_UNUSED int ret = 0;
+
+ ec = this->private;
+
+ op_ret = -1;
+ for (i = 0; i < ec->nodes; i++) {
+ snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
+
+ if (((ec->xl_up >> i) & 1) == 0) {
+ ret = dict_set_str(output, key, "Brick is not connected");
+ } else if (!ec->up) {
+ ret = dict_set_str(output, key, "Disperse subvolume is not up");
+ } else if (!ec_shd_is_subvol_local(this, i)) {
+ ret = dict_set_str(output, key, "Brick is remote");
+ } else {
+ ret = dict_set_str(output, key, "Started self-heal");
+ if (op == GF_SHD_OP_HEAL_FULL) {
+ ec_shd_full_healer_spawn(this, i);
+ } else if (op == GF_SHD_OP_HEAL_INDEX) {
+ ec_shd_index_healer_spawn(this, i);
+ }
+ op_ret = 0;
}
- return op_ret;
+ }
+ return op_ret;
}
int
-ec_xl_op (xlator_t *this, dict_t *input, dict_t *output)
+ec_xl_op(xlator_t *this, dict_t *input, dict_t *output)
{
- gf_xl_afr_op_t op = GF_SHD_OP_INVALID;
- int ret = 0;
- int xl_id = 0;
+ gf_xl_afr_op_t op = GF_SHD_OP_INVALID;
+ int ret = 0;
+ int xl_id = 0;
- ret = dict_get_int32 (input, "xl-op", (int32_t *)&op);
- if (ret)
- goto out;
+ ret = dict_get_int32(input, "xl-op", (int32_t *)&op);
+ if (ret)
+ goto out;
- ret = dict_get_int32 (input, this->name, &xl_id);
- if (ret)
- goto out;
+ ret = dict_get_int32(input, this->name, &xl_id);
+ if (ret)
+ goto out;
- ret = dict_set_int32 (output, this->name, xl_id);
- if (ret)
- goto out;
+ ret = dict_set_int32(output, this->name, xl_id);
+ if (ret)
+ goto out;
- switch (op) {
+ switch (op) {
case GF_SHD_OP_HEAL_FULL:
- ret = ec_heal_op (this, output, op, xl_id);
- break;
+ ret = ec_heal_op(this, output, op, xl_id);
+ break;
case GF_SHD_OP_HEAL_INDEX:
- ret = ec_heal_op (this, output, op, xl_id);
- break;
+ ret = ec_heal_op(this, output, op, xl_id);
+ break;
default:
- ret = -1;
- break;
- }
+ ret = -1;
+ break;
+ }
out:
- dict_del (output, this->name);
- return ret;
+ dict_del(output, this->name);
+ return ret;
+}
+
+void
+ec_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
+{
+ if (!healer)
+ return;
+
+ pthread_cond_destroy(&healer->cond);
+ pthread_mutex_destroy(&healer->mutex);
+}
+
+void
+ec_selfheal_daemon_fini(xlator_t *this)
+{
+ struct subvol_healer *healer = NULL;
+ ec_self_heald_t *shd = NULL;
+ ec_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ shd = &priv->shd;
+ if (!shd->iamshd)
+ return;
+
+ for (i = 0; i < priv->nodes; i++) {
+ healer = &shd->index_healers[i];
+ ec_destroy_healer_object(this, healer);
+
+ healer = &shd->full_healers[i];
+ ec_destroy_healer_object(this, healer);
+ }
+
+ GF_FREE(shd->index_healers);
+ GF_FREE(shd->full_healers);
}
diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h
index 0f27a8ec776..6c7da4edc10 100644
--- a/xlators/cluster/ec/src/ec-heald.h
+++ b/xlators/cluster/ec/src/ec-heald.h
@@ -11,37 +11,20 @@
#ifndef __EC_HEALD_H__
#define __EC_HEALD_H__
-#include "xlator.h"
-
-struct _ec;
-typedef struct _ec ec_t;
-
-struct subvol_healer {
- xlator_t *this;
- int subvol;
- gf_boolean_t local;
- gf_boolean_t running;
- gf_boolean_t rerun;
- pthread_mutex_t mutex;
- pthread_cond_t cond;
- pthread_t thread;
-};
-
-struct _ec_self_heald;
-typedef struct _ec_self_heald ec_self_heald_t;
-
-struct _ec_self_heald {
- gf_boolean_t iamshd;
- gf_boolean_t enabled;
- int timeout;
- struct subvol_healer *index_healers;
- struct subvol_healer *full_healers;
-};
+#include "ec-types.h" // for ec_t
+#include "glusterfs/dict.h" // for dict_t
+#include "glusterfs/globals.h" // for xlator_t
int
-ec_xl_op (xlator_t *this, dict_t *input, dict_t *output);
+ec_xl_op(xlator_t *this, dict_t *input, dict_t *output);
int
-ec_selfheal_daemon_init (xlator_t *this);
-void ec_selfheal_childup (ec_t *ec, int child);
+ec_selfheal_daemon_init(xlator_t *this);
+
+void
+ec_shd_index_healer_wake(ec_t *ec);
+
+void
+ec_selfheal_daemon_fini(xlator_t *this);
+
#endif /* __EC_HEALD_H__ */
diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c
index 65deba63959..48f54475e01 100644
--- a/xlators/cluster/ec/src/ec-helpers.c
+++ b/xlators/cluster/ec/src/ec-helpers.c
@@ -10,35 +10,29 @@
#include <libgen.h>
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
+#include "ec.h"
#include "ec-mem-types.h"
+#include "ec-messages.h"
#include "ec-fops.h"
+#include "ec-method.h"
#include "ec-helpers.h"
-#ifndef ffsll
-#define ffsll(x) __builtin_ffsll(x)
-#endif
+static const char *ec_fop_list[] = {[-EC_FOP_HEAL] = "HEAL"};
-static const char * ec_fop_list[] =
-{
- [-EC_FOP_HEAL] = "HEAL"
-};
-
-const char * ec_bin(char * str, size_t size, uint64_t value, int32_t digits)
+const char *
+ec_bin(char *str, size_t size, uint64_t value, int32_t digits)
{
str += size;
- if (size-- < 1)
- {
+ if (size-- < 1) {
goto failed;
}
*--str = 0;
- while ((value != 0) || (digits > 0))
- {
- if (size-- < 1)
- {
+ while ((value != 0) || (digits > 0)) {
+ if (size-- < 1) {
goto failed;
}
*--str = '0' + (value & 1);
@@ -52,21 +46,22 @@ failed:
return "<buffer too small>";
}
-const char * ec_fop_name(int32_t id)
+const char *
+ec_fop_name(int32_t id)
{
- if (id >= 0)
- {
+ if (id >= 0) {
return gf_fop_list[id];
}
return ec_fop_list[-id];
}
-void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...)
+void
+ec_trace(const char *event, ec_fop_data_t *fop, const char *fmt, ...)
{
char str1[32], str2[32], str3[32];
- char * msg;
- ec_t * ec = fop->xl->private;
+ char *msg;
+ ec_t *ec = fop->xl->private;
va_list args;
int32_t ret;
@@ -74,45 +69,28 @@ void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...)
ret = vasprintf(&msg, fmt, args);
va_end(args);
- if (ret < 0)
- {
+ if (ret < 0) {
msg = "<memory allocation error>";
}
- gf_log("ec", GF_LOG_TRACE, "%s(%s) %p(%p) [refs=%d, winds=%d, jobs=%d] "
- "frame=%p/%p, min/exp=%d/%d, err=%d state=%d "
- "{%s:%s:%s} %s",
- event, ec_fop_name(fop->id), fop, fop->parent, fop->refs,
- fop->winds, fop->jobs, fop->req_frame, fop->frame, fop->minimum,
- fop->expected, fop->error, fop->state,
- ec_bin(str1, sizeof(str1), fop->mask, ec->nodes),
- ec_bin(str2, sizeof(str2), fop->remaining, ec->nodes),
- ec_bin(str3, sizeof(str3), fop->bad, ec->nodes), msg);
+ gf_msg_trace("ec", 0,
+ "%s(%s) %p(%p) [refs=%d, winds=%d, jobs=%d] "
+ "frame=%p/%p, min/exp=%d/%d, err=%d state=%d "
+ "{%s:%s:%s} %s",
+ event, ec_fop_name(fop->id), fop, fop->parent, fop->refs,
+ fop->winds, fop->jobs, fop->req_frame, fop->frame,
+ fop->minimum, fop->expected, fop->error, fop->state,
+ ec_bin(str1, sizeof(str1), fop->mask, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->remaining, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->good, ec->nodes), msg);
- if (ret >= 0)
- {
+ if (ret >= 0) {
free(msg);
}
}
-int32_t ec_bits_count(uint64_t n)
-{
- n -= (n >> 1) & 0x5555555555555555ULL;
- n = ((n >> 2) & 0x3333333333333333ULL) + (n & 0x3333333333333333ULL);
- n = (n + (n >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
- n += n >> 8;
- n += n >> 16;
- n += n >> 32;
-
- return n & 0xFF;
-}
-
-int32_t ec_bits_index(uint64_t n)
-{
- return ffsll(n) - 1;
-}
-
-int32_t ec_bits_consume(uint64_t * n)
+int32_t
+ec_bits_consume(uint64_t *n)
{
uint64_t tmp;
@@ -120,24 +98,21 @@ int32_t ec_bits_consume(uint64_t * n)
tmp &= -tmp;
*n ^= tmp;
- return ffsll(tmp) - 1;
+ return gf_bits_index(tmp);
}
-size_t ec_iov_copy_to(void * dst, struct iovec * vector, int32_t count,
- off_t offset, size_t size)
+size_t
+ec_iov_copy_to(void *dst, struct iovec *vector, int32_t count, off_t offset,
+ size_t size)
{
int32_t i = 0;
size_t total = 0, len = 0;
- while (i < count)
- {
- if (offset < vector[i].iov_len)
- {
- while ((i < count) && (size > 0))
- {
+ while (i < count) {
+ if (offset < vector[i].iov_len) {
+ while ((i < count) && (size > 0)) {
len = size;
- if (len > vector[i].iov_len - offset)
- {
+ if (len > vector[i].iov_len - offset) {
len = vector[i].iov_len - offset;
}
memcpy(dst, vector[i++].iov_base + offset, len);
@@ -157,30 +132,161 @@ size_t ec_iov_copy_to(void * dst, struct iovec * vector, int32_t count,
return total;
}
-int32_t ec_dict_set_number(dict_t * dict, char * key, uint64_t value)
+int32_t
+ec_buffer_alloc(xlator_t *xl, size_t size, struct iobref **piobref, void **ptr)
{
- uint64_t * ptr;
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ int32_t ret = -ENOMEM;
+
+ iobuf = iobuf_get_page_aligned(xl->ctx->iobuf_pool, size,
+ EC_METHOD_WORD_SIZE);
+ if (iobuf == NULL) {
+ goto out;
+ }
+
+ iobref = *piobref;
+ if (iobref == NULL) {
+ iobref = iobref_new();
+ if (iobref == NULL) {
+ goto out;
+ }
+ }
+
+ ret = iobref_add(iobref, iobuf);
+ if (ret != 0) {
+ if (iobref != *piobref) {
+ iobref_unref(iobref);
+ }
+ iobref = NULL;
+
+ goto out;
+ }
+
+ GF_ASSERT(EC_ALIGN_CHECK(iobuf->ptr, EC_METHOD_WORD_SIZE));
+
+ *ptr = iobuf->ptr;
+
+out:
+ if (iobuf != NULL) {
+ iobuf_unref(iobuf);
+ }
+
+ if (iobref != NULL) {
+ *piobref = iobref;
+ }
+
+ return ret;
+}
+
+int32_t
+ec_dict_set_array(dict_t *dict, char *key, uint64_t value[], int32_t size)
+{
+ int ret = -1;
+ uint64_t *ptr = NULL;
+ int32_t vindex;
+
+ if (value == NULL) {
+ return -EINVAL;
+ }
+
+ ptr = GF_MALLOC(sizeof(uint64_t) * size, gf_common_mt_char);
+ if (ptr == NULL) {
+ return -ENOMEM;
+ }
+ for (vindex = 0; vindex < size; vindex++) {
+ ptr[vindex] = hton64(value[vindex]);
+ }
+ ret = dict_set_bin(dict, key, ptr, sizeof(uint64_t) * size);
+ if (ret)
+ GF_FREE(ptr);
+ return ret;
+}
+
+int32_t
+ec_dict_get_array(dict_t *dict, char *key, uint64_t value[], int32_t size)
+{
+ void *ptr;
+ int32_t len;
+ int32_t vindex;
+ int32_t old_size = 0;
+ int32_t err;
+
+ if (dict == NULL) {
+ return -EINVAL;
+ }
+ err = dict_get_ptr_and_len(dict, key, &ptr, &len);
+ if (err != 0) {
+ return err;
+ }
+
+ if (len > (size * sizeof(uint64_t)) || (len % sizeof(uint64_t))) {
+ return -EINVAL;
+ }
+
+ /* 3.6 version ec would have stored version in 64 bit. In that case treat
+ * metadata versions same as data*/
+ old_size = min(size, len / sizeof(uint64_t));
+ for (vindex = 0; vindex < old_size; vindex++) {
+ value[vindex] = ntoh64(*((uint64_t *)ptr + vindex));
+ }
+
+ if (old_size < size) {
+ for (vindex = old_size; vindex < size; vindex++) {
+ value[vindex] = value[old_size - 1];
+ }
+ }
+
+ return 0;
+}
+
+int32_t
+ec_dict_del_array(dict_t *dict, char *key, uint64_t value[], int32_t size)
+{
+ int ret = 0;
+
+ ret = ec_dict_get_array(dict, key, value, size);
+ if (ret == 0)
+ dict_del(dict, key);
+
+ return ret;
+}
+
+int32_t
+ec_dict_set_number(dict_t *dict, char *key, uint64_t value)
+{
+ int ret = -1;
+ uint64_t *ptr;
ptr = GF_MALLOC(sizeof(value), gf_common_mt_char);
- if (ptr == NULL)
- {
- return -1;
+ if (ptr == NULL) {
+ return -ENOMEM;
}
*ptr = hton64(value);
- return dict_set_bin(dict, key, ptr, sizeof(value));
+ ret = dict_set_bin(dict, key, ptr, sizeof(value));
+ if (ret)
+ GF_FREE(ptr);
+
+ return ret;
}
-int32_t ec_dict_del_number(dict_t * dict, char * key, uint64_t * value)
+int32_t
+ec_dict_del_number(dict_t *dict, char *key, uint64_t *value)
{
- void * ptr;
- int32_t len;
+ void *ptr;
+ int32_t len, err;
- if ((dict == NULL) || (dict_get_ptr_and_len(dict, key, &ptr, &len) != 0) ||
- (len != sizeof(uint64_t)))
- {
- return -1;
+ if (dict == NULL) {
+ return -EINVAL;
+ }
+ err = dict_get_ptr_and_len(dict, key, &ptr, &len);
+ if (err != 0) {
+ return err;
+ }
+ if (len != sizeof(uint64_t)) {
+ return -EINVAL;
}
*value = ntoh64(*(uint64_t *)ptr);
@@ -190,22 +296,24 @@ int32_t ec_dict_del_number(dict_t * dict, char * key, uint64_t * value)
return 0;
}
-int32_t ec_dict_set_config(dict_t * dict, char * key, ec_config_t * config)
+int32_t
+ec_dict_set_config(dict_t *dict, char *key, ec_config_t *config)
{
- uint64_t * ptr, data;
+ int ret = -1;
+ uint64_t *ptr, data;
- if (config->version > EC_CONFIG_VERSION)
- {
- gf_log("ec", GF_LOG_ERROR, "Trying to store an unsupported config "
- "version (%u)", config->version);
+ if (config->version > EC_CONFIG_VERSION) {
+ gf_msg("ec", GF_LOG_ERROR, EINVAL, EC_MSG_UNSUPPORTED_VERSION,
+ "Trying to store an unsupported config "
+ "version (%u)",
+ config->version);
- return -1;
+ return -EINVAL;
}
ptr = GF_MALLOC(sizeof(uint64_t), gf_common_mt_char);
- if (ptr == NULL)
- {
- return -1;
+ if (ptr == NULL) {
+ return -ENOMEM;
}
data = ((uint64_t)config->version) << 56;
@@ -217,30 +325,51 @@ int32_t ec_dict_set_config(dict_t * dict, char * key, ec_config_t * config)
*ptr = hton64(data);
- return dict_set_bin(dict, key, ptr, sizeof(uint64_t));
+ ret = dict_set_bin(dict, key, ptr, sizeof(uint64_t));
+ if (ret)
+ GF_FREE(ptr);
+
+ return ret;
}
-int32_t ec_dict_del_config(dict_t * dict, char * key, ec_config_t * config)
+int32_t
+ec_dict_del_config(dict_t *dict, char *key, ec_config_t *config)
{
- void * ptr;
+ void *ptr;
uint64_t data;
- int32_t len;
+ int32_t len, err;
- if ((dict == NULL) || (dict_get_ptr_and_len(dict, key, &ptr, &len) != 0) ||
- (len != sizeof(uint64_t)))
- {
- return -1;
+ if (dict == NULL) {
+ return -EINVAL;
+ }
+ err = dict_get_ptr_and_len(dict, key, &ptr, &len);
+ if (err != 0) {
+ return err;
+ }
+ if (len != sizeof(uint64_t)) {
+ return -EINVAL;
}
data = ntoh64(*(uint64_t *)ptr);
+ /* Currently we need to get the config xattr for entries of type IA_INVAL.
+ * These entries can later become IA_DIR entries (after inode_link()),
+ * which don't have a config xattr. However, since the xattr is requested
+ * using an xattrop() fop, it will always return a config full of 0's
+ * instead of saying that it doesn't exist.
+ *
+ * We need to filter out this case and consider that a config xattr == 0 is
+ * the same as a non-existent xattr. Otherwise ec_config_check() will fail.
+ */
+ if (data == 0) {
+ return -ENODATA;
+ }
config->version = (data >> 56) & 0xff;
- if (config->version > EC_CONFIG_VERSION)
- {
- gf_log("ec", GF_LOG_ERROR, "Found an unsupported config version (%u)",
- config->version);
+ if (config->version > EC_CONFIG_VERSION) {
+ gf_msg("ec", GF_LOG_ERROR, EINVAL, EC_MSG_UNSUPPORTED_VERSION,
+ "Found an unsupported config version (%u)", config->version);
- return -1;
+ return -EINVAL;
}
config->algorithm = (data >> 48) & 0xff;
@@ -254,43 +383,43 @@ int32_t ec_dict_del_config(dict_t * dict, char * key, ec_config_t * config)
return 0;
}
-int32_t ec_loc_gfid_check(xlator_t * xl, uuid_t dst, uuid_t src)
+gf_boolean_t
+ec_loc_gfid_check(xlator_t *xl, uuid_t dst, uuid_t src)
{
- if (gf_uuid_is_null(src))
- {
- return 1;
+ if (gf_uuid_is_null(src)) {
+ return _gf_true;
}
- if (gf_uuid_is_null(dst))
- {
+ if (gf_uuid_is_null(dst)) {
gf_uuid_copy(dst, src);
- return 1;
+ return _gf_true;
}
- if (gf_uuid_compare(dst, src) != 0)
- {
- gf_log(xl->name, GF_LOG_WARNING, "Mismatching GFID's in loc");
+ if (gf_uuid_compare(dst, src) != 0) {
+ gf_msg(xl->name, GF_LOG_WARNING, 0, EC_MSG_GFID_MISMATCH,
+ "Mismatching GFID's in loc");
- return 0;
+ return _gf_false;
}
- return 1;
+ return _gf_true;
}
-int32_t ec_loc_setup_inode(xlator_t *xl, loc_t *loc)
+int32_t
+ec_loc_setup_inode(xlator_t *xl, inode_table_t *table, loc_t *loc)
{
- int32_t ret = -1;
+ int32_t ret = -EINVAL;
if (loc->inode != NULL) {
if (!ec_loc_gfid_check(xl, loc->gfid, loc->inode->gfid)) {
goto out;
}
- } else if (loc->parent != NULL) {
+ } else if (table != NULL) {
if (!gf_uuid_is_null(loc->gfid)) {
- loc->inode = inode_find(loc->parent->table, loc->gfid);
- } else if (loc->path && strchr (loc->path, '/')) {
- loc->inode = inode_resolve(loc->parent->table, (char *)loc->path);
+ loc->inode = inode_find(table, loc->gfid);
+ } else if (loc->path && strchr(loc->path, '/')) {
+ loc->inode = inode_resolve(table, (char *)loc->path);
}
}
@@ -300,49 +429,65 @@ out:
return ret;
}
-int32_t ec_loc_setup_parent(xlator_t *xl, loc_t *loc)
+int32_t
+ec_loc_setup_parent(xlator_t *xl, inode_table_t *table, loc_t *loc)
{
char *path, *parent;
- int32_t ret = -1;
+ int32_t ret = -EINVAL;
if (loc->parent != NULL) {
if (!ec_loc_gfid_check(xl, loc->pargfid, loc->parent->gfid)) {
goto out;
}
- } else if (loc->inode != NULL) {
+ } else if (table != NULL) {
if (!gf_uuid_is_null(loc->pargfid)) {
- loc->parent = inode_find(loc->inode->table, loc->pargfid);
- } else if (loc->path && strchr (loc->path, '/')) {
+ loc->parent = inode_find(table, loc->pargfid);
+ } else if (loc->path && strchr(loc->path, '/')) {
path = gf_strdup(loc->path);
if (path == NULL) {
- gf_log(xl->name, GF_LOG_ERROR, "Unable to duplicate path '%s'",
- loc->path);
+ gf_msg(xl->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Unable to duplicate path '%s'", loc->path);
+
+ ret = -ENOMEM;
goto out;
}
parent = dirname(path);
- loc->parent = inode_resolve(loc->inode->table, parent);
+ loc->parent = inode_resolve(table, parent);
+ if (loc->parent != NULL) {
+ gf_uuid_copy(loc->pargfid, loc->parent->gfid);
+ }
GF_FREE(path);
}
}
+ /* If 'pargfid' has not been determined, clear 'name' to avoid resolutions
+ based on <gfid:pargfid>/name. */
+ if (gf_uuid_is_null(loc->pargfid)) {
+ loc->name = NULL;
+ }
+
ret = 0;
out:
return ret;
}
-int32_t ec_loc_setup_path(xlator_t *xl, loc_t *loc)
+int32_t
+ec_loc_setup_path(xlator_t *xl, loc_t *loc)
{
- uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ static uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
char *name;
- int32_t ret = -1;
+ int32_t ret = -EINVAL;
if (loc->path != NULL) {
name = strrchr(loc->path, '/');
if (name == NULL) {
- ret = 0; /*<gfid:gfid-str>*/
- goto out;
+ /* Allow gfid paths: <gfid:...> */
+ if (strncmp(loc->path, "<gfid:", 6) == 0) {
+ ret = 0;
+ }
+ goto out;
}
if (name == loc->path) {
if (name[1] == 0) {
@@ -359,8 +504,8 @@ int32_t ec_loc_setup_path(xlator_t *xl, loc_t *loc)
if (loc->name != NULL) {
if (strcmp(loc->name, name) != 0) {
- gf_log(xl->name, GF_LOG_ERROR, "Invalid name '%s' in loc",
- loc->name);
+ gf_msg(xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_LOC_NAME,
+ "Invalid name '%s' in loc", loc->name);
goto out;
}
@@ -375,45 +520,58 @@ out:
return ret;
}
-int32_t ec_loc_parent(xlator_t *xl, loc_t *loc, loc_t *parent)
+int32_t
+ec_loc_parent(xlator_t *xl, loc_t *loc, loc_t *parent)
{
+ inode_table_t *table = NULL;
char *str = NULL;
- int32_t ret = -1;
+ int32_t ret = -ENOMEM;
memset(parent, 0, sizeof(loc_t));
if (loc->parent != NULL) {
+ table = loc->parent->table;
parent->inode = inode_ref(loc->parent);
+ } else if (loc->inode != NULL) {
+ table = loc->inode->table;
}
if (!gf_uuid_is_null(loc->pargfid)) {
gf_uuid_copy(parent->gfid, loc->pargfid);
}
- if (loc->path && strchr (loc->path, '/')) {
+ if (loc->path && strchr(loc->path, '/')) {
str = gf_strdup(loc->path);
if (str == NULL) {
- gf_log(xl->name, GF_LOG_ERROR, "Unable to duplicate path '%s'",
- loc->path);
+ gf_msg(xl->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Unable to duplicate path '%s'", loc->path);
goto out;
}
parent->path = gf_strdup(dirname(str));
if (parent->path == NULL) {
- gf_log(xl->name, GF_LOG_ERROR, "Unable to duplicate path '%s'",
- dirname(str));
+ gf_msg(xl->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Unable to duplicate path '%s'", dirname(str));
goto out;
}
}
- if ((ec_loc_setup_path(xl, parent) != 0) ||
- (ec_loc_setup_inode(xl, parent) != 0) ||
- (ec_loc_setup_parent(xl, parent) != 0)) {
+ ret = ec_loc_setup_path(xl, parent);
+ if (ret == 0) {
+ ret = ec_loc_setup_inode(xl, table, parent);
+ }
+ if (ret == 0) {
+ ret = ec_loc_setup_parent(xl, table, parent);
+ }
+ if (ret != 0) {
goto out;
}
if ((parent->inode == NULL) && (parent->path == NULL) &&
gf_uuid_is_null(parent->gfid)) {
- gf_log(xl->name, GF_LOG_ERROR, "Parent inode missing for loc_t");
+ gf_msg(xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_LOC_PARENT_INODE_MISSING,
+ "Parent inode missing for loc_t");
+
+ ret = -EINVAL;
goto out;
}
@@ -423,25 +581,32 @@ int32_t ec_loc_parent(xlator_t *xl, loc_t *loc, loc_t *parent)
out:
GF_FREE(str);
- if (ret != 0)
- {
+ if (ret != 0) {
loc_wipe(parent);
}
return ret;
}
-int32_t ec_loc_update(xlator_t *xl, loc_t *loc, inode_t *inode,
- struct iatt *iatt)
+int32_t
+ec_loc_update(xlator_t *xl, loc_t *loc, inode_t *inode, struct iatt *iatt)
{
- int32_t ret = -1;
-
- if ((inode != NULL) && (loc->inode != inode)) {
- if (loc->inode != NULL) {
- inode_unref(loc->inode);
+ inode_table_t *table = NULL;
+ int32_t ret = -EINVAL;
+
+ if (inode != NULL) {
+ table = inode->table;
+ if (loc->inode != inode) {
+ if (loc->inode != NULL) {
+ inode_unref(loc->inode);
+ }
+ loc->inode = inode_ref(inode);
+ gf_uuid_copy(loc->gfid, inode->gfid);
}
- loc->inode = inode_ref(inode);
- gf_uuid_copy(loc->gfid, inode->gfid);
+ } else if (loc->inode != NULL) {
+ table = loc->inode->table;
+ } else if (loc->parent != NULL) {
+ table = loc->parent->table;
}
if (iatt != NULL) {
@@ -450,22 +615,26 @@ int32_t ec_loc_update(xlator_t *xl, loc_t *loc, inode_t *inode,
}
}
- if ((ec_loc_setup_path(xl, loc) != 0) ||
- (ec_loc_setup_inode(xl, loc) != 0) ||
- (ec_loc_setup_parent(xl, loc) != 0)) {
+ ret = ec_loc_setup_path(xl, loc);
+ if (ret == 0) {
+ ret = ec_loc_setup_inode(xl, table, loc);
+ }
+ if (ret == 0) {
+ ret = ec_loc_setup_parent(xl, table, loc);
+ }
+ if (ret != 0) {
goto out;
}
- ret = 0;
-
out:
return ret;
}
-int32_t ec_loc_from_fd(xlator_t * xl, loc_t * loc, fd_t * fd)
+int32_t
+ec_loc_from_fd(xlator_t *xl, loc_t *loc, fd_t *fd)
{
- ec_fd_t * ctx;
- int32_t ret = -1;
+ ec_fd_t *ctx;
+ int32_t ret = -ENOMEM;
memset(loc, 0, sizeof(*loc));
@@ -476,12 +645,11 @@ int32_t ec_loc_from_fd(xlator_t * xl, loc_t * loc, fd_t * fd)
}
}
- if (ec_loc_update(xl, loc, fd->inode, NULL) != 0) {
+ ret = ec_loc_update(xl, loc, fd->inode, NULL);
+ if (ret != 0) {
goto out;
}
- ret = 0;
-
out:
if (ret != 0) {
loc_wipe(loc);
@@ -490,9 +658,10 @@ out:
return ret;
}
-int32_t ec_loc_from_loc(xlator_t * xl, loc_t * dst, loc_t * src)
+int32_t
+ec_loc_from_loc(xlator_t *xl, loc_t *dst, loc_t *src)
{
- int32_t ret = -1;
+ int32_t ret = -ENOMEM;
memset(dst, 0, sizeof(*dst));
@@ -500,12 +669,11 @@ int32_t ec_loc_from_loc(xlator_t * xl, loc_t * dst, loc_t * src)
goto out;
}
- if (ec_loc_update(xl, dst, NULL, NULL) != 0) {
+ ret = ec_loc_update(xl, dst, NULL, NULL);
+ if (ret != 0) {
goto out;
}
- ret = 0;
-
out:
if (ret != 0) {
loc_wipe(dst);
@@ -514,50 +682,62 @@ out:
return ret;
}
-void ec_owner_set(call_frame_t * frame, void * owner)
+void
+ec_owner_set(call_frame_t *frame, void *owner)
{
set_lk_owner_from_ptr(&frame->root->lk_owner, owner);
}
-void ec_owner_copy(call_frame_t * frame, gf_lkowner_t * owner)
+void
+ec_owner_copy(call_frame_t *frame, gf_lkowner_t *owner)
{
- frame->root->lk_owner.len = owner->len;
- memcpy(frame->root->lk_owner.data, owner->data, owner->len);
+ lk_owner_copy(&frame->root->lk_owner, owner);
}
-ec_inode_t * __ec_inode_get(inode_t * inode, xlator_t * xl)
+static void
+ec_stripe_cache_init(ec_t *ec, ec_inode_t *ctx)
{
- ec_inode_t * ctx = NULL;
+ ec_stripe_list_t *stripe_cache = NULL;
+
+ stripe_cache = &(ctx->stripe_cache);
+ if (stripe_cache->max == 0) {
+ stripe_cache->max = ec->stripe_cache;
+ }
+}
+
+ec_inode_t *
+__ec_inode_get(inode_t *inode, xlator_t *xl)
+{
+ ec_inode_t *ctx = NULL;
uint64_t value = 0;
- if ((__inode_ctx_get(inode, xl, &value) != 0) || (value == 0))
- {
+ if ((__inode_ctx_get(inode, xl, &value) != 0) || (value == 0)) {
ctx = GF_MALLOC(sizeof(*ctx), ec_mt_ec_inode_t);
- if (ctx != NULL)
- {
+ if (ctx != NULL) {
memset(ctx, 0, sizeof(*ctx));
INIT_LIST_HEAD(&ctx->heal);
-
+ INIT_LIST_HEAD(&ctx->stripe_cache.lru);
+ ctx->heal_count = 0;
value = (uint64_t)(uintptr_t)ctx;
- if (__inode_ctx_set(inode, xl, &value) != 0)
- {
+ if (__inode_ctx_set(inode, xl, &value) != 0) {
GF_FREE(ctx);
return NULL;
}
}
- }
- else
- {
+ } else {
ctx = (ec_inode_t *)(uintptr_t)value;
}
+ if (ctx)
+ ec_stripe_cache_init(xl->private, ctx);
return ctx;
}
-ec_inode_t * ec_inode_get(inode_t * inode, xlator_t * xl)
+ec_inode_t *
+ec_inode_get(inode_t *inode, xlator_t *xl)
{
- ec_inode_t * ctx = NULL;
+ ec_inode_t *ctx = NULL;
LOCK(&inode->lock);
@@ -568,34 +748,47 @@ ec_inode_t * ec_inode_get(inode_t * inode, xlator_t * xl)
return ctx;
}
-ec_fd_t * __ec_fd_get(fd_t * fd, xlator_t * xl)
+ec_fd_t *
+__ec_fd_get(fd_t *fd, xlator_t *xl)
{
- ec_fd_t * ctx = NULL;
+ int i = 0;
+ ec_fd_t *ctx = NULL;
+ ec_inode_t *ictx = NULL;
uint64_t value = 0;
+ ec_t *ec = xl->private;
- if ((__fd_ctx_get(fd, xl, &value) != 0) || (value == 0))
- {
- ctx = GF_MALLOC(sizeof(*ctx), ec_mt_ec_fd_t);
- if (ctx != NULL)
- {
+ if ((__fd_ctx_get(fd, xl, &value) != 0) || (value == 0)) {
+ ctx = GF_MALLOC(sizeof(*ctx) + (sizeof(ec_fd_status_t) * ec->nodes),
+ ec_mt_ec_fd_t);
+ if (ctx != NULL) {
memset(ctx, 0, sizeof(*ctx));
+ for (i = 0; i < ec->nodes; i++) {
+ if (fd_is_anonymous(fd)) {
+ ctx->fd_status[i] = EC_FD_OPENED;
+ } else {
+ ctx->fd_status[i] = EC_FD_NOT_OPENED;
+ }
+ }
+
value = (uint64_t)(uintptr_t)ctx;
- if (__fd_ctx_set(fd, xl, value) != 0)
- {
+ if (__fd_ctx_set(fd, xl, value) != 0) {
GF_FREE(ctx);
-
return NULL;
}
+ /* Only refering bad-version so no need for lock
+ * */
+ ictx = __ec_inode_get(fd->inode, xl);
+ if (ictx) {
+ ctx->bad_version = ictx->bad_version;
+ }
}
- }
- else
- {
+ } else {
ctx = (ec_fd_t *)(uintptr_t)value;
}
/* Treat anonymous fd specially */
- if (fd->anonymous) {
+ if (fd->anonymous && ctx) {
/* Mark the fd open for all subvolumes. */
ctx->open = -1;
/* Try to populate ctx->loc with fd->inode information. */
@@ -605,9 +798,10 @@ ec_fd_t * __ec_fd_get(fd_t * fd, xlator_t * xl)
return ctx;
}
-ec_fd_t * ec_fd_get(fd_t * fd, xlator_t * xl)
+ec_fd_t *
+ec_fd_get(fd_t *fd, xlator_t *xl)
{
- ec_fd_t * ctx = NULL;
+ ec_fd_t *ctx = NULL;
LOCK(&fd->lock);
@@ -618,48 +812,56 @@ ec_fd_t * ec_fd_get(fd_t * fd, xlator_t * xl)
return ctx;
}
-uint32_t ec_adjust_offset(ec_t * ec, off_t * offset, int32_t scale)
+gf_boolean_t
+ec_is_internal_xattr(dict_t *dict, char *key, data_t *value, void *data)
{
- off_t head, tmp;
-
- tmp = *offset;
- head = tmp % ec->stripe_size;
- tmp -= head;
- if (scale)
- {
- tmp /= ec->fragments;
- }
-
- *offset = tmp;
+ if (key && (strncmp(key, EC_XATTR_PREFIX, SLEN(EC_XATTR_PREFIX)) == 0))
+ return _gf_true;
- return head;
+ return _gf_false;
}
-uint64_t ec_adjust_size(ec_t * ec, uint64_t size, int32_t scale)
+void
+ec_filter_internal_xattrs(dict_t *xattr)
{
- size += ec->stripe_size - 1;
- size -= size % ec->stripe_size;
- if (scale)
- {
- size /= ec->fragments;
- }
-
- return size;
+ dict_foreach_match(xattr, ec_is_internal_xattr, NULL,
+ dict_remove_foreach_fn, NULL);
}
gf_boolean_t
-ec_is_internal_xattr (dict_t *dict, char *key, data_t *value, void *data)
+ec_is_data_fop(glusterfs_fop_t fop)
{
- if (key &&
- (strncmp (key, EC_XATTR_PREFIX, strlen (EC_XATTR_PREFIX)) == 0))
- return _gf_true;
-
- return _gf_false;
+ switch (fop) {
+ case GF_FOP_WRITE:
+ case GF_FOP_TRUNCATE:
+ case GF_FOP_FTRUNCATE:
+ case GF_FOP_FALLOCATE:
+ case GF_FOP_DISCARD:
+ case GF_FOP_ZEROFILL:
+ return _gf_true;
+ default:
+ return _gf_false;
+ }
+ return _gf_false;
}
-
-void
-ec_filter_internal_xattrs (dict_t *xattr)
+/*
+gf_boolean_t
+ec_is_metadata_fop (int32_t lock_kind, glusterfs_fop_t fop)
{
- dict_foreach_match (xattr, ec_is_internal_xattr, NULL,
- dict_remove_foreach_fn, NULL);
-}
+ if (lock_kind == EC_LOCK_ENTRY) {
+ return _gf_false;
+ }
+
+ switch (fop) {
+ case GF_FOP_SETATTR:
+ case GF_FOP_FSETATTR:
+ case GF_FOP_SETXATTR:
+ case GF_FOP_FSETXATTR:
+ case GF_FOP_REMOVEXATTR:
+ case GF_FOP_FREMOVEXATTR:
+ return _gf_true;
+ default:
+ return _gf_false;
+ }
+ return _gf_false;
+}*/
diff --git a/xlators/cluster/ec/src/ec-helpers.h b/xlators/cluster/ec/src/ec-helpers.h
index 11d2707b3c0..015db514e05 100644
--- a/xlators/cluster/ec/src/ec-helpers.h
+++ b/xlators/cluster/ec/src/ec-helpers.h
@@ -11,48 +11,190 @@
#ifndef __EC_HELPERS_H__
#define __EC_HELPERS_H__
-#include "ec-data.h"
+#include "ec-types.h"
-const char * ec_bin(char * str, size_t size, uint64_t value, int32_t digits);
-const char * ec_fop_name(int32_t id);
-void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...);
-int32_t ec_bits_count(uint64_t n);
-int32_t ec_bits_index(uint64_t n);
-int32_t ec_bits_consume(uint64_t * n);
-size_t ec_iov_copy_to(void * dst, struct iovec * vector, int32_t count,
- off_t offset, size_t size);
+#define EC_ERR(_x) ((void *)-(intptr_t)(_x))
+#define EC_IS_ERR(_x) (((uintptr_t)(_x) & ~0xfffULL) == ~0xfffULL)
+#define EC_GET_ERR(_x) ((int32_t)(intptr_t)(_x))
-int32_t ec_dict_set_number(dict_t * dict, char * key, uint64_t value);
-int32_t ec_dict_del_number(dict_t * dict, char * key, uint64_t * value);
-int32_t ec_dict_set_config(dict_t * dict, char * key, ec_config_t * config);
-int32_t ec_dict_del_config(dict_t * dict, char * key, ec_config_t * config);
+#define EC_ALIGN_CHECK(_ptr, _align) ((((uintptr_t)(_ptr)) & ((_align)-1)) == 0)
-int32_t ec_loc_parent(xlator_t *xl, loc_t *loc, loc_t *parent);
-int32_t ec_loc_update(xlator_t *xl, loc_t *loc, inode_t *inode,
- struct iatt *iatt);
+const char *
+ec_bin(char *str, size_t size, uint64_t value, int32_t digits);
+const char *
+ec_fop_name(int32_t id);
+void
+ec_trace(const char *event, ec_fop_data_t *fop, const char *fmt, ...);
+int32_t
+ec_bits_consume(uint64_t *n);
+size_t
+ec_iov_copy_to(void *dst, struct iovec *vector, int32_t count, off_t offset,
+ size_t size);
+int32_t
+ec_buffer_alloc(xlator_t *xl, size_t size, struct iobref **piobref, void **ptr);
+int32_t
+ec_dict_set_array(dict_t *dict, char *key, uint64_t *value, int32_t size);
+int32_t
+ec_dict_get_array(dict_t *dict, char *key, uint64_t value[], int32_t size);
+
+int32_t
+ec_dict_del_array(dict_t *dict, char *key, uint64_t *value, int32_t size);
+int32_t
+ec_dict_set_number(dict_t *dict, char *key, uint64_t value);
+int32_t
+ec_dict_del_number(dict_t *dict, char *key, uint64_t *value);
+int32_t
+ec_dict_set_config(dict_t *dict, char *key, ec_config_t *config);
+int32_t
+ec_dict_del_config(dict_t *dict, char *key, ec_config_t *config);
+
+int32_t
+ec_loc_parent(xlator_t *xl, loc_t *loc, loc_t *parent);
+int32_t
+ec_loc_update(xlator_t *xl, loc_t *loc, inode_t *inode, struct iatt *iatt);
+
+int32_t
+ec_loc_from_fd(xlator_t *xl, loc_t *loc, fd_t *fd);
+int32_t
+ec_loc_from_loc(xlator_t *xl, loc_t *dst, loc_t *src);
+
+void
+ec_owner_set(call_frame_t *frame, void *owner);
+void
+ec_owner_copy(call_frame_t *frame, gf_lkowner_t *owner);
+
+ec_inode_t *
+__ec_inode_get(inode_t *inode, xlator_t *xl);
+ec_inode_t *
+ec_inode_get(inode_t *inode, xlator_t *xl);
+ec_fd_t *
+__ec_fd_get(fd_t *fd, xlator_t *xl);
+ec_fd_t *
+ec_fd_get(fd_t *fd, xlator_t *xl);
+
+static inline uint32_t
+ec_adjust_size_down(ec_t *ec, uint64_t *value, gf_boolean_t scale)
+{
+ uint64_t head, tmp;
+
+ tmp = *value;
+ head = tmp % ec->stripe_size;
+ tmp -= head;
-int32_t ec_loc_from_fd(xlator_t * xl, loc_t * loc, fd_t * fd);
-int32_t ec_loc_from_loc(xlator_t * xl, loc_t * dst, loc_t * src);
+ if (scale) {
+ tmp /= ec->fragments;
+ }
-void ec_owner_set(call_frame_t * frame, void * owner);
-void ec_owner_copy(call_frame_t * frame, gf_lkowner_t * owner);
+ *value = tmp;
-ec_inode_t * __ec_inode_get(inode_t * inode, xlator_t * xl);
-ec_inode_t * ec_inode_get(inode_t * inode, xlator_t * xl);
-ec_fd_t * __ec_fd_get(fd_t * fd, xlator_t * xl);
-ec_fd_t * ec_fd_get(fd_t * fd, xlator_t * xl);
+ return (uint32_t)head;
+}
+
+/* This function can cause an overflow if the passed value is too near to the
+ * uint64_t limit. If this happens, it returns the tail in negative form and
+ * the value is set to UINT64_MAX. */
+static inline int32_t
+ec_adjust_size_up(ec_t *ec, uint64_t *value, gf_boolean_t scale)
+{
+ uint64_t tmp;
+ int32_t tail;
-uint32_t ec_adjust_offset(ec_t * ec, off_t * offset, int32_t scale);
-uint64_t ec_adjust_size(ec_t * ec, uint64_t size, int32_t scale);
+ tmp = *value;
+ /* We first adjust the value down. This never causes overflow. */
+ tail = ec_adjust_size_down(ec, &tmp, scale);
+
+ /* If the value was already aligned, tail will be 0 and nothing else
+ * needs to be done. */
+ if (tail != 0) {
+ /* Otherwise, we need to compute the real tail and adjust the
+ * returned value to the next stripe. */
+ tail = ec->stripe_size - tail;
+ if (scale) {
+ tmp += ec->fragment_size;
+ } else {
+ tmp += ec->stripe_size;
+ /* If no scaling is requested there's a possibility of
+ * overflow. */
+ if (tmp < ec->stripe_size) {
+ tmp = UINT64_MAX;
+ tail = -tail;
+ }
+ }
+ }
+
+ *value = tmp;
+
+ return tail;
+}
-static inline int32_t ec_is_power_of_2(uint32_t value)
+/* This function is equivalent to ec_adjust_size_down() but with a potentially
+ * different parameter size (off_t vs uint64_t). */
+static inline uint32_t
+ec_adjust_offset_down(ec_t *ec, off_t *value, gf_boolean_t scale)
+{
+ off_t head, tmp;
+
+ tmp = *value;
+ head = tmp % ec->stripe_size;
+ tmp -= head;
+
+ if (scale) {
+ tmp /= ec->fragments;
+ }
+
+ *value = tmp;
+
+ return (uint32_t)head;
+}
+
+/* This function is equivalent to ec_adjust_size_up() but with a potentially
+ * different parameter size (off_t vs uint64_t). */
+static inline int32_t
+ec_adjust_offset_up(ec_t *ec, off_t *value, gf_boolean_t scale)
+{
+ uint64_t tail, tmp;
+
+ /* An offset is a signed type that can only have positive values, so
+ * we take advantage of this to avoid overflows. We simply convert it
+ * to an unsigned integer and operate normally. This won't cause an
+ * overflow. Overflow is only checked when converting back to an
+ * off_t. */
+ tmp = *value;
+ tail = ec->stripe_size;
+ tail -= (tmp + tail - 1) % tail + 1;
+ tmp += tail;
+ if (scale) {
+ /* If we are scaling, we'll never get an overflow. */
+ tmp /= ec->fragments;
+ } else {
+ /* Check if there has been an overflow. */
+ if ((off_t)tmp < 0) {
+ tmp = GF_OFF_MAX;
+ tail = -tail;
+ }
+ }
+
+ *value = (off_t)tmp;
+
+ return (int32_t)tail;
+}
+
+static inline int32_t
+ec_is_power_of_2(uint32_t value)
{
return (value != 0) && ((value & (value - 1)) == 0);
}
gf_boolean_t
-ec_is_internal_xattr (dict_t *dict, char *key, data_t *value, void *data);
+ec_is_internal_xattr(dict_t *dict, char *key, data_t *value, void *data);
void
-ec_filter_internal_xattrs (dict_t *xattr);
+ec_filter_internal_xattrs(dict_t *xattr);
+
+gf_boolean_t
+ec_is_data_fop(glusterfs_fop_t fop);
+
+int32_t
+ec_launch_replace_heal(ec_t *ec);
+
#endif /* __EC_HELPERS_H__ */
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
index d126d48eb12..dad5f4d7018 100644
--- a/xlators/cluster/ec/src/ec-inode-read.c
+++ b/xlators/cluster/ec/src/ec-inode-read.c
@@ -8,9 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-
+#include "ec.h"
+#include "ec-messages.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
@@ -19,10 +18,12 @@
/* FOP: access */
-int32_t ec_access_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, dict_t * xdata)
+int32_t
+ec_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- ec_fop_data_t * fop = NULL;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
@@ -32,28 +33,27 @@ int32_t ec_access_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
fop = frame->local;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
- if (!ec_dispatch_one_retry(fop, idx, op_ret, op_errno))
- {
- if (fop->cbks.access != NULL)
- {
- fop->cbks.access(fop->req_frame, fop, this, op_ret, op_errno,
- xdata);
- }
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_ACCESS, idx, op_ret,
+ op_errno);
+ if (cbk) {
+ if (xdata)
+ cbk->xdata = dict_ref(xdata);
+ ec_combine(cbk, NULL);
}
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_complete(fop);
}
return 0;
}
-void ec_wind_access(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_access(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -62,75 +62,112 @@ void ec_wind_access(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
&fop->loc[0], fop->int32, fop->xdata);
}
-int32_t ec_manager_access(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_access(ec_fop_data_t *fop, int32_t state)
{
- switch (state)
- {
+ ec_cbk_data_t *cbk = NULL;
+
+ switch (state) {
case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO, 0,
+ EC_RANGE_FULL);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
case EC_STATE_DISPATCH:
ec_dispatch_one(fop);
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ if (ec_dispatch_one_retry(fop, NULL)) {
+ return EC_STATE_DISPATCH;
+ }
+
return EC_STATE_REPORT;
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+ GF_ASSERT(cbk);
+ if (fop->cbks.access != NULL) {
+ if (cbk) {
+ fop->cbks.access(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->xdata);
+ }
+ }
+ return EC_STATE_LOCK_REUSE;
+
case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
- if (fop->cbks.access != NULL)
- {
+ if (fop->cbks.access != NULL) {
fop->cbks.access(fop->req_frame, fop, fop->xl, -1, fop->error,
NULL);
}
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
- case EC_STATE_REPORT:
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_access(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_access_cbk_t func, void * data,
- loc_t * loc, int32_t mask, dict_t * xdata)
+void
+ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_access_cbk_t func, void *data, loc_t *loc,
+ int32_t mask, dict_t *xdata)
{
- ec_cbk_t callback = { .access = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.access = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(ACCESS) %p", frame);
+ gf_msg_trace("ec", 0, "EC(ACCESS) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_ACCESS, 0, target, minimum,
- ec_wind_access, ec_manager_access, callback,
- data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_ACCESS, EC_FLAG_LOCK_SHARED,
+ target, fop_flags, ec_wind_access,
+ ec_manager_access, callback, data);
+ if (fop == NULL) {
goto out;
}
fop->int32 = mask;
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -139,25 +176,22 @@ void ec_access(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
}
}
/* FOP: getxattr */
-int32_t ec_combine_getxattr(ec_fop_data_t * fop, ec_cbk_data_t * dst,
- ec_cbk_data_t * src)
+int32_t
+ec_combine_getxattr(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
{
- if (!ec_dict_compare(dst->dict, src->dict))
- {
- gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching dictionary in "
- "answers of 'GF_FOP_GETXATTR'");
+ if (!ec_dict_compare(dst->dict, src->dict)) {
+ gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_DICT_MISMATCH,
+ "Mismatching dictionary in "
+ "answers of 'GF_FOP_GETXATTR'");
return 0;
}
@@ -165,12 +199,12 @@ int32_t ec_combine_getxattr(ec_fop_data_t * fop, ec_cbk_data_t * dst,
return 1;
}
-int32_t ec_getxattr_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, dict_t * dict,
- dict_t * xdata)
+int32_t
+ec_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- ec_fop_data_t * fop = NULL;
- ec_cbk_data_t * cbk = NULL;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
@@ -180,34 +214,30 @@ int32_t ec_getxattr_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
fop = frame->local;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_GETXATTR, idx, op_ret,
op_errno);
- if (cbk != NULL)
- {
- if (op_ret >= 0)
- {
- if (dict != NULL)
- {
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ if (dict != NULL) {
cbk->dict = dict_ref(dict);
- if (cbk->dict == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (cbk->dict == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
cbk->xdata = dict_ref(xdata);
- if (cbk->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -217,15 +247,15 @@ int32_t ec_getxattr_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
}
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_complete(fop);
}
return 0;
}
-void ec_wind_getxattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_getxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -234,23 +264,44 @@ void ec_wind_getxattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
&fop->loc[0], fop->str[0], fop->xdata);
}
-int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state)
+void
+ec_handle_special_xattrs(ec_fop_data_t *fop)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk = NULL;
+ /* Stime may not be available on all the bricks, so even if some of the
+ * subvols succeed the operation, treat it as answer.*/
+ if (fop->str[0] && fnmatch(GF_XATTR_STIME_PATTERN, fop->str[0], 0) == 0) {
+ if (!fop->answer || (fop->answer->op_ret < 0)) {
+ list_for_each_entry(cbk, &fop->cbk_list, list)
+ {
+ if (cbk->op_ret >= 0) {
+ fop->answer = cbk;
+ break;
+ }
+ }
+ }
+ }
+}
+
+int32_t
+ec_manager_getxattr(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
case EC_STATE_LOCK:
/* clear-locks commands must be done without any locks acquired
to avoid interferences. */
if ((fop->str[0] == NULL) ||
(strncmp(fop->str[0], GF_XATTR_CLRLK_CMD,
- strlen(GF_XATTR_CLRLK_CMD)) != 0)) {
+ SLEN(GF_XATTR_CLRLK_CMD)) != 0)) {
if (fop->fd == NULL) {
- ec_lock_prepare_inode(fop, &fop->loc[0], 0);
+ ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO, 0,
+ EC_RANGE_FULL);
} else {
- ec_lock_prepare_fd(fop, fop->fd, 0);
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, 0,
+ EC_RANGE_FULL);
}
ec_lock(fop);
}
@@ -258,41 +309,35 @@ int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state)
return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH:
- ec_dispatch_all(fop);
+ if (fop->minimum == EC_MINIMUM_ALL) {
+ ec_dispatch_all(fop);
+ } else {
+ ec_dispatch_one(fop);
+ }
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA) ||
- ((cbk->op_ret >= 0) && !ec_dict_combine(cbk,
- EC_COMBINE_DICT)))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
+ ec_handle_special_xattrs(fop);
+ if (fop->minimum == EC_MINIMUM_ALL) {
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
+ } else {
+ if (ec_dispatch_one_retry(fop, &cbk)) {
+ return EC_STATE_DISPATCH;
}
- if (cbk->op_ret < 0)
- {
- ec_fop_set_error(fop, cbk->op_errno);
- }
- else
- {
+ }
+ if (cbk != NULL) {
+ int32_t err;
+
+ err = ec_dict_combine(cbk, EC_COMBINE_DICT);
+ if (!ec_cbk_set_error(cbk, -err, _gf_true)) {
if (cbk->xdata != NULL)
- ec_filter_internal_xattrs (cbk->xdata);
+ ec_filter_internal_xattrs(cbk->xdata);
if (cbk->dict != NULL)
- ec_filter_internal_xattrs (cbk->dict);
+ ec_filter_internal_xattrs(cbk->dict);
}
}
- else
- {
- ec_fop_set_error(fop, EIO);
- }
return EC_STATE_REPORT;
@@ -301,8 +346,7 @@ int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->cbks.getxattr != NULL)
- {
+ if (fop->cbks.getxattr != NULL) {
fop->cbks.getxattr(fop->req_frame, fop, fop->xl, cbk->op_ret,
cbk->op_errno, cbk->dict, cbk->xdata);
}
@@ -316,10 +360,9 @@ int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->cbks.getxattr != NULL)
- {
- fop->cbks.getxattr(fop->req_frame, fop, fop->xl, -1,
- fop->error, NULL, NULL);
+ if (fop->cbks.getxattr != NULL) {
+ fop->cbks.getxattr(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL);
}
return EC_STATE_LOCK_REUSE;
@@ -337,24 +380,44 @@ int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-int32_t ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
- int32_t op_ret, int32_t op_errno, uintptr_t mask,
- uintptr_t good, uintptr_t bad, dict_t *xdata)
+int32_t
+ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
+ int32_t op_ret, int32_t op_errno, uintptr_t mask,
+ uintptr_t good, uintptr_t bad, uint32_t pending,
+ dict_t *xdata)
{
- ec_fop_data_t *fop = cookie;
- fop_getxattr_cbk_t func = fop->data;
+ fop_getxattr_cbk_t func = cookie;
ec_t *ec = xl->private;
dict_t *dict = NULL;
char *str;
char bin1[65], bin2[65];
+ /* We try to return the 'pending' information in xdata, but if this cannot
+ * be set, we will ignore it silently. We prefer to report the success or
+ * failure of the heal itself. */
+ if (xdata == NULL) {
+ xdata = dict_new();
+ } else {
+ dict_ref(xdata);
+ }
+ if (xdata != NULL) {
+ if (dict_set_uint32(xdata, EC_XATTR_HEAL_NEW, pending) != 0) {
+ /* dict_set_uint32() is marked as 'warn_unused_result' and gcc
+ * enforces to check the result in this case. However we don't
+ * really care if it succeeded or not. We'll just do the same.
+ *
+ * This empty 'if' avoids the warning, and it will be removed by
+ * the optimizer. */
+ }
+ }
+
if (op_ret >= 0) {
dict = dict_new();
if (dict == NULL) {
@@ -374,7 +437,7 @@ int32_t ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
goto out;
}
- if (dict_set_str(dict, EC_XATTR_HEAL, str) != 0) {
+ if (dict_set_dynstr(dict, EC_XATTR_HEAL, str) != 0) {
GF_FREE(str);
dict_unref(dict);
dict = NULL;
@@ -388,25 +451,28 @@ int32_t ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
}
out:
- func(frame, NULL, xl, op_ret, op_errno, dict, NULL);
+ func(frame, NULL, xl, op_ret, op_errno, dict, xdata);
if (dict != NULL) {
dict_unref(dict);
}
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
return 0;
}
void
-ec_getxattr (call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_getxattr_cbk_t func, void *data,
- loc_t *loc, const char *name, dict_t *xdata)
+ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_getxattr_cbk_t func, void *data, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- ec_cbk_t callback = { .getxattr = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.getxattr = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(GETXATTR) %p", frame);
+ gf_msg_trace("ec", 0, "EC(GETXATTR) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
@@ -414,47 +480,49 @@ ec_getxattr (call_frame_t *frame, xlator_t *this, uintptr_t target,
/* Special handling of an explicit self-heal request */
if ((name != NULL) && (strcmp(name, EC_XATTR_HEAL) == 0)) {
- ec_heal(frame, this, target, EC_MINIMUM_ONE, ec_getxattr_heal_cbk,
- func, loc, 0, NULL);
+ ec_heal(frame, this, target, EC_MINIMUM_ONE, ec_getxattr_heal_cbk, func,
+ loc, 0, NULL);
return;
}
- fop = ec_fop_data_allocate(frame, this, GF_FOP_GETXATTR,
- EC_FLAG_UPDATE_LOC_INODE, target, minimum,
- ec_wind_getxattr, ec_manager_getxattr, callback,
- data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(
+ frame, this, GF_FOP_GETXATTR, EC_FLAG_LOCK_SHARED, target, fop_flags,
+ ec_wind_getxattr, ec_manager_getxattr, callback, data);
+ if (fop == NULL) {
goto out;
}
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (name != NULL)
- {
- fop->str[0] = gf_strdup(name);
- if (fop->str[0] == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to duplicate a string.");
+ if (name != NULL) {
+ /* In case of list-node-uuids xattr, set flag to indicate
+ * the same and use node-uuid xattr for winding fop */
+ if (XATTR_IS_NODE_UUID_LIST(name)) {
+ fop->int32 = 1;
+ fop->str[0] = gf_strdup(GF_XATTR_NODE_UUID_KEY);
+ } else {
+ fop->str[0] = gf_strdup(name);
+ }
+ if (fop->str[0] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
goto out;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -464,20 +532,20 @@ ec_getxattr (call_frame_t *frame, xlator_t *this, uintptr_t target,
out:
if (fop != NULL) {
- ec_manager (fop, error);
+ ec_manager(fop, error);
} else {
- func (frame, NULL, this, -1, error, NULL, NULL);
+ func(frame, NULL, this, -1, error, NULL, NULL);
}
}
/* FOP: fgetxattr */
-int32_t ec_fgetxattr_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, dict_t * dict,
- dict_t * xdata)
+int32_t
+ec_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- ec_fop_data_t * fop = NULL;
- ec_cbk_data_t * cbk = NULL;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
@@ -487,34 +555,30 @@ int32_t ec_fgetxattr_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
fop = frame->local;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FGETXATTR, idx, op_ret,
op_errno);
- if (cbk != NULL)
- {
- if (op_ret >= 0)
- {
- if (dict != NULL)
- {
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ if (dict != NULL) {
cbk->dict = dict_ref(dict);
- if (cbk->dict == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (cbk->dict == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
cbk->xdata = dict_ref(xdata);
- if (cbk->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -524,8 +588,7 @@ int32_t ec_fgetxattr_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
}
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_complete(fop);
}
@@ -533,7 +596,7 @@ out:
}
void
-ec_wind_fgetxattr (ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+ec_wind_fgetxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -543,59 +606,54 @@ ec_wind_fgetxattr (ec_t *ec, ec_fop_data_t *fop, int32_t idx)
}
void
-ec_fgetxattr (call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fgetxattr_cbk_t func, void *data,
- fd_t *fd, const char *name, dict_t *xdata)
+ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fgetxattr_cbk_t func, void *data, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- ec_cbk_t callback = { .fgetxattr = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.fgetxattr = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(FGETXATTR) %p", frame);
+ gf_msg_trace("ec", 0, "EC(FGETXATTR) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FGETXATTR,
- EC_FLAG_UPDATE_FD_INODE, target, minimum,
- ec_wind_fgetxattr, ec_manager_getxattr,
- callback, data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(
+ frame, this, GF_FOP_FGETXATTR, EC_FLAG_LOCK_SHARED, target, fop_flags,
+ ec_wind_fgetxattr, ec_manager_getxattr, callback, data);
+ if (fop == NULL) {
goto out;
}
fop->use_fd = 1;
- if (fd != NULL)
- {
+ if (fd != NULL) {
fop->fd = fd_ref(fd);
- if (fop->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
- if (name != NULL)
- {
+ if (name != NULL) {
fop->str[0] = gf_strdup(name);
- if (fop->str[0] == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to duplicate a string.");
+ if (fop->str[0] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
goto out;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -605,21 +663,21 @@ ec_fgetxattr (call_frame_t *frame, xlator_t *this, uintptr_t target,
out:
if (fop != NULL) {
- ec_manager (fop, error);
+ ec_manager(fop, error);
} else {
- func (frame, NULL, this, -1, error, NULL, NULL);
+ func(frame, NULL, this, -1, error, NULL, NULL);
}
}
/* FOP: open */
-int32_t ec_combine_open(ec_fop_data_t * fop, ec_cbk_data_t * dst,
- ec_cbk_data_t * src)
+int32_t
+ec_combine_open(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
{
- if (dst->fd != src->fd)
- {
- gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching fd in answers "
- "of 'GF_FOP_OPEN': %p <-> %p",
+ if (dst->fd != src->fd) {
+ gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_FD_MISMATCH,
+ "Mismatching fd in answers "
+ "of 'GF_FOP_OPEN': %p <-> %p",
dst->fd, src->fd);
return 0;
@@ -628,12 +686,12 @@ int32_t ec_combine_open(ec_fop_data_t * fop, ec_cbk_data_t * dst,
return 1;
}
-int32_t ec_open_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, fd_t * fd,
- dict_t * xdata)
+int32_t
+ec_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- ec_fop_data_t * fop = NULL;
- ec_cbk_data_t * cbk = NULL;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
@@ -643,52 +701,51 @@ int32_t ec_open_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
fop = frame->local;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_OPEN, idx, op_ret,
op_errno);
- if (cbk != NULL)
- {
- if (op_ret >= 0)
- {
- if (fd != NULL)
- {
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ if (fd != NULL) {
cbk->fd = fd_ref(fd);
- if (cbk->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (cbk->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
cbk->xdata = dict_ref(xdata);
- if (cbk->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
}
ec_combine(cbk, ec_combine_open);
+
+ ec_update_fd_status(fd, this, idx, op_ret);
}
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_complete(fop);
}
return 0;
}
-void ec_wind_open(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_open(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -697,10 +754,10 @@ void ec_wind_open(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
&fop->loc[0], fop->int32, fop->fd, fop->xdata);
}
-int32_t ec_open_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf,
- dict_t *xdata)
+int32_t
+ec_open_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
ec_fop_data_t *fop = cookie;
int32_t error = 0;
@@ -717,25 +774,35 @@ int32_t ec_open_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int32_t ec_manager_open(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_open(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
- ec_fd_t * ctx;
+ ec_cbk_data_t *cbk;
+ ec_fd_t *ctx;
+ int32_t err;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
LOCK(&fop->fd->lock);
ctx = __ec_fd_get(fop->fd, fop->xl);
- if ((ctx == NULL) ||
- (ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0])) != 0) {
+ if (ctx == NULL) {
UNLOCK(&fop->fd->lock);
- fop->error = EIO;
+ fop->error = ENOMEM;
return EC_STATE_REPORT;
}
+ if (!ctx->loc.inode) {
+ err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
+ if (err != 0) {
+ UNLOCK(&fop->fd->lock);
+
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+ }
ctx->flags = fop->int32;
@@ -750,7 +817,7 @@ int32_t ec_manager_open(ec_fop_data_t * fop, int32_t state)
fop->uint32 = fop->int32 & O_TRUNC;
fop->int32 &= ~(O_APPEND | O_TRUNC);
- /* Fall through */
+ /* Fall through */
case EC_STATE_DISPATCH:
ec_dispatch_all(fop);
@@ -758,51 +825,33 @@ int32_t ec_manager_open(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
+ if (cbk != NULL) {
+ int32_t err;
+
+ err = ec_loc_update(fop->xl, &fop->loc[0], cbk->fd->inode,
+ NULL);
+ if (!ec_cbk_set_error(cbk, -err, _gf_true)) {
+ LOCK(&fop->fd->lock);
+
+ ctx = __ec_fd_get(fop->fd, fop->xl);
+ if (ctx != NULL) {
+ ctx->open |= cbk->mask;
}
- }
- if (cbk->op_ret >= 0) {
- if (ec_loc_update(fop->xl, &fop->loc[0], cbk->fd->inode,
- NULL) != 0) {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- } else {
- LOCK(&fop->fd->lock);
-
- ctx = __ec_fd_get(fop->fd, fop->xl);
- if (ctx != NULL) {
- ctx->open |= cbk->mask;
- }
-
- UNLOCK(&fop->fd->lock);
-
- /* If O_TRUNC was specified, call ftruncate to
- effectively trunc the file with appropriate locks
- acquired. We don't use ctx->flags because self-heal
- can use the same fd with different flags. */
- if (fop->uint32 != 0) {
- ec_sleep(fop);
- ec_ftruncate(fop->req_frame, fop->xl, cbk->mask,
- fop->minimum, ec_open_truncate_cbk,
- fop, cbk->fd, 0, NULL);
- }
+
+ UNLOCK(&fop->fd->lock);
+
+ /* If O_TRUNC was specified, call ftruncate to
+ effectively trunc the file with appropriate locks
+ acquired. We don't use ctx->flags because self-heal
+ can use the same fd with different flags. */
+ if (fop->uint32 != 0) {
+ ec_sleep(fop);
+ ec_ftruncate(fop->req_frame, fop->xl, cbk->mask,
+ fop->minimum, ec_open_truncate_cbk, fop,
+ cbk->fd, 0, NULL);
}
}
- if (cbk->op_ret < 0) {
- ec_fop_set_error(fop, cbk->op_errno);
- }
- }
- else
- {
- ec_fop_set_error(fop, EIO);
}
return EC_STATE_REPORT;
@@ -812,8 +861,7 @@ int32_t ec_manager_open(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->cbks.open != NULL)
- {
+ if (fop->cbks.open != NULL) {
fop->cbks.open(fop->req_frame, fop, fop->xl, cbk->op_ret,
cbk->op_errno, cbk->fd, cbk->xdata);
}
@@ -826,8 +874,7 @@ int32_t ec_manager_open(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->cbks.open != NULL)
- {
+ if (fop->cbks.open != NULL) {
fop->cbks.open(fop->req_frame, fop, fop->xl, -1, fop->error,
NULL, NULL);
}
@@ -835,64 +882,61 @@ int32_t ec_manager_open(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_open(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_open_cbk_t func, void * data, loc_t * loc,
- int32_t flags, fd_t * fd, dict_t * xdata)
+void
+ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_open_cbk_t func, void *data, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
- ec_cbk_t callback = { .open = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.open = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(OPEN) %p", frame);
+ gf_msg_trace("ec", 0, "EC(OPEN) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_OPEN, EC_FLAG_UPDATE_FD,
- target, minimum, ec_wind_open, ec_manager_open,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_OPEN, EC_FLAG_LOCK_SHARED,
+ target, fop_flags, ec_wind_open, ec_manager_open,
callback, data);
- if (fop == NULL)
- {
+ if (fop == NULL) {
goto out;
}
fop->int32 = flags;
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (fd != NULL)
- {
+ if (fd != NULL) {
fop->fd = fd_ref(fd);
- if (fop->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -901,25 +945,22 @@ void ec_open(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
}
}
/* FOP: readlink */
-int32_t ec_combine_readlink(ec_fop_data_t * fop, ec_cbk_data_t * dst,
- ec_cbk_data_t * src)
+int32_t
+ec_combine_readlink(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
{
- if (!ec_iatt_combine(dst->iatt, src->iatt, 1))
- {
- gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching iatt in "
- "answers of 'GF_FOP_READLINK'");
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 1)) {
+ gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_IATT_MISMATCH,
+ "Mismatching iatt in "
+ "answers of 'GF_FOP_READLINK'");
return 0;
}
@@ -927,11 +968,13 @@ int32_t ec_combine_readlink(ec_fop_data_t * fop, ec_cbk_data_t * dst,
return 1;
}
-int32_t ec_readlink_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, const char * path,
- struct iatt * buf, dict_t * xdata)
+int32_t
+ec_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
{
- ec_fop_data_t * fop = NULL;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
@@ -941,33 +984,34 @@ int32_t ec_readlink_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
fop = frame->local;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
- if (op_ret > 0)
- {
- ec_iatt_rebuild(fop->xl->private, buf, 1, 1);
- }
+ cbk = ec_cbk_data_allocate(frame, this, fop, fop->id, idx, op_ret,
+ op_errno);
+ if (cbk) {
+ if (xdata)
+ cbk->xdata = dict_ref(xdata);
- if (!ec_dispatch_one_retry(fop, idx, op_ret, op_errno))
- {
- if (fop->cbks.readlink != NULL)
- {
- fop->cbks.readlink(fop->req_frame, fop, this, op_ret, op_errno,
- path, buf, xdata);
+ if (cbk->op_ret >= 0) {
+ cbk->iatt[0] = *buf;
+ cbk->str = gf_strdup(path);
+ if (!cbk->str) {
+ ec_cbk_set_error(cbk, ENOMEM, _gf_true);
+ }
}
+ ec_combine(cbk, NULL);
}
out:
if (fop != NULL)
- {
ec_complete(fop);
- }
return 0;
}
-void ec_wind_readlink(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_readlink(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -976,75 +1020,114 @@ void ec_wind_readlink(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
&fop->loc[0], fop->size, fop->xdata);
}
-int32_t ec_manager_readlink(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_readlink(ec_fop_data_t *fop, int32_t state)
{
- switch (state)
- {
+ ec_cbk_data_t *cbk = NULL;
+
+ switch (state) {
case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO, 0,
+ EC_RANGE_FULL);
+ ec_lock(fop);
+ return EC_STATE_DISPATCH;
+
case EC_STATE_DISPATCH:
ec_dispatch_one(fop);
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ if (ec_dispatch_one_retry(fop, &cbk)) {
+ return EC_STATE_DISPATCH;
+ }
+
+ if ((cbk != NULL) && (cbk->op_ret >= 0)) {
+ ec_iatt_rebuild(fop->xl->private, &cbk->iatt[0], 1, 1);
+ }
+
return EC_STATE_REPORT;
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+ GF_ASSERT(cbk);
+ if (fop->cbks.readlink != NULL) {
+ fop->cbks.readlink(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->str, &cbk->iatt[0],
+ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
- if (fop->cbks.readlink != NULL)
- {
- fop->cbks.readlink(fop->req_frame, fop, fop->xl, -1,
- fop->error, NULL, NULL, NULL);
+ if (fop->cbks.readlink != NULL) {
+ fop->cbks.readlink(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL);
}
+ return EC_STATE_LOCK_REUSE;
- case EC_STATE_REPORT:
- return EC_STATE_END;
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_readlink(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_readlink_cbk_t func, void * data,
- loc_t * loc, size_t size, dict_t * xdata)
+void
+ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readlink_cbk_t func, void *data, loc_t *loc,
+ size_t size, dict_t *xdata)
{
- ec_cbk_t callback = { .readlink = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.readlink = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(READLINK) %p", frame);
+ gf_msg_trace("ec", 0, "EC(READLINK) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_READLINK, 0, target,
- minimum, ec_wind_readlink, ec_manager_readlink,
- callback, data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(
+ frame, this, GF_FOP_READLINK, EC_FLAG_LOCK_SHARED, target, fop_flags,
+ ec_wind_readlink, ec_manager_readlink, callback, data);
+ if (fop == NULL) {
goto out;
}
fop->size = size;
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -1053,95 +1136,82 @@ void ec_readlink(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
}
}
/* FOP: readv */
-int32_t ec_readv_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk)
+int32_t
+ec_readv_rebuild(ec_t *ec, ec_fop_data_t *fop, ec_cbk_data_t *cbk)
{
- ec_cbk_data_t * ans = NULL;
- struct iobref * iobref = NULL;
- struct iobuf * iobuf = NULL;
- uint8_t * buff = NULL, * ptr;
- size_t fsize = 0, size = 0, max = 0;
- int32_t i = 0;
-
- if (cbk->op_ret < 0)
- {
+ struct iovec vector[1];
+ ec_cbk_data_t *ans = NULL;
+ struct iobref *iobref = NULL;
+ void *ptr;
+ uint64_t fsize = 0, size = 0, max = 0;
+ int32_t pos, err = -ENOMEM;
+
+ if (cbk->op_ret < 0) {
+ err = -cbk->op_errno;
+
goto out;
}
- cbk->iatt[0].ia_size = fop->pre_size;
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop, fop->fd->inode, &cbk->iatt[0].ia_size));
- if (cbk->op_ret > 0)
- {
- struct iovec vector[1];
- uint8_t * blocks[cbk->count];
+ if (cbk->op_ret > 0) {
+ void *blocks[cbk->count];
uint32_t values[cbk->count];
fsize = cbk->op_ret;
size = fsize * ec->fragments;
- buff = GF_MALLOC(size, gf_common_mt_char);
- if (buff == NULL)
- {
- goto out;
- }
- ptr = buff;
- for (i = 0, ans = cbk; ans != NULL; i++, ans = ans->next)
- {
- values[i] = ans->idx;
- blocks[i] = ptr;
- ptr += ec_iov_copy_to(ptr, ans->vector, ans->int32, 0, fsize);
+ for (ans = cbk; ans != NULL; ans = ans->next) {
+ pos = gf_bits_count(cbk->mask & ((1 << ans->idx) - 1));
+ values[pos] = ans->idx + 1;
+ blocks[pos] = ans->vector[0].iov_base;
+ if ((ans->int32 != 1) ||
+ !EC_ALIGN_CHECK(blocks[pos], EC_METHOD_WORD_SIZE)) {
+ if (iobref == NULL) {
+ err = ec_buffer_alloc(ec->xl, size, &iobref, &ptr);
+ if (err != 0) {
+ goto out;
+ }
+ }
+ ec_iov_copy_to(ptr, ans->vector, ans->int32, 0, fsize);
+ blocks[pos] = ptr;
+ ptr += fsize;
+ }
}
- iobref = iobref_new();
- if (iobref == NULL)
- {
+ err = ec_buffer_alloc(ec->xl, size, &iobref, &ptr);
+ if (err != 0) {
goto out;
}
- iobuf = iobuf_get2(fop->xl->ctx->iobuf_pool, size);
- if (iobuf == NULL)
- {
- goto out;
- }
- if (iobref_add(iobref, iobuf) != 0)
- {
+
+ err = ec_method_decode(&ec->matrix, fsize, cbk->mask, values, blocks,
+ ptr);
+ if (err != 0) {
goto out;
}
- vector[0].iov_base = iobuf->ptr;
- vector[0].iov_len = ec_method_decode(fsize, ec->fragments, values,
- blocks, iobuf->ptr);
-
- iobuf_unref(iobuf);
-
- GF_FREE(buff);
- buff = NULL;
-
- vector[0].iov_base += fop->head;
- vector[0].iov_len -= fop->head;
+ vector[0].iov_base = ptr + fop->head;
+ vector[0].iov_len = size - fop->head;
max = fop->offset * ec->fragments + size;
- if (max > cbk->iatt[0].ia_size)
- {
+ if (max > cbk->iatt[0].ia_size) {
max = cbk->iatt[0].ia_size;
}
max -= fop->offset * ec->fragments + fop->head;
- if (max > fop->user_size)
- {
+ if (max > fop->user_size) {
max = fop->user_size;
}
size -= fop->head;
- if (size > max)
- {
+ if (size > max) {
vector[0].iov_len -= size - max;
size = max;
}
@@ -1154,50 +1224,36 @@ int32_t ec_readv_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk)
GF_FREE(cbk->vector);
cbk->vector = iov_dup(vector, 1);
- if (cbk->vector == NULL)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
-
- return 0;
+ if (cbk->vector == NULL) {
+ return -ENOMEM;
}
}
- return 1;
+ return 0;
out:
- if (iobuf != NULL)
- {
- iobuf_unref(iobuf);
- }
- if (iobref != NULL)
- {
+ if (iobref != NULL) {
iobref_unref(iobref);
}
- GF_FREE(buff);
- return 0;
+ return err;
}
-int32_t ec_combine_readv(ec_fop_data_t * fop, ec_cbk_data_t * dst,
- ec_cbk_data_t * src)
+int32_t
+ec_combine_readv(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
{
- if (src->dirty) {
- return 0;
- }
-
- if (!ec_vector_compare(dst->vector, dst->int32, src->vector, src->int32))
- {
- gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching vector in "
- "answers of 'GF_FOP_READ'");
+ if (!ec_vector_compare(dst->vector, dst->int32, src->vector, src->int32)) {
+ gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_VECTOR_MISMATCH,
+ "Mismatching vector in "
+ "answers of 'GF_FOP_READ'");
return 0;
}
- if (!ec_iatt_combine(dst->iatt, src->iatt, 1))
- {
- gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching iatt in "
- "answers of 'GF_FOP_READ'");
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 1)) {
+ gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_IATT_MISMATCH,
+ "Mismatching iatt in "
+ "answers of 'GF_FOP_READ'");
return 0;
}
@@ -1205,14 +1261,14 @@ int32_t ec_combine_readv(ec_fop_data_t * fop, ec_cbk_data_t * dst,
return 1;
}
-int32_t ec_readv_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, struct iovec * vector,
- int32_t count, struct iatt * stbuf,
- struct iobref * iobref, dict_t * xdata)
+int32_t
+ec_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
- ec_fop_data_t * fop = NULL;
- ec_cbk_data_t * cbk = NULL;
- ec_t * ec = this->private;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ ec_t *ec = this->private;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
@@ -1222,76 +1278,68 @@ int32_t ec_readv_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
fop = frame->local;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_READ, idx, op_ret,
op_errno);
- if (cbk != NULL)
- {
- if (op_ret >= 0)
- {
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
cbk->int32 = count;
- if (count > 0)
- {
+ if (count > 0) {
cbk->vector = iov_dup(vector, count);
- if (cbk->vector == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to duplicate a "
- "vector list.");
+ if (cbk->vector == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a "
+ "vector list.");
goto out;
}
cbk->int32 = count;
}
- if (stbuf != NULL)
- {
+ if (stbuf != NULL) {
cbk->iatt[0] = *stbuf;
}
- if (iobref != NULL)
- {
+ if (iobref != NULL) {
cbk->buffers = iobref_ref(iobref);
- if (cbk->buffers == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "buffer.");
+ if (cbk->buffers == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_BUF_REF_FAIL,
+ "Failed to reference a "
+ "buffer.");
goto out;
}
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
cbk->xdata = dict_ref(xdata);
- if (cbk->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
}
- if ((op_ret > 0) && ((op_ret % ec->fragment_size) != 0))
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
+ if ((op_ret > 0) && ((op_ret % ec->fragment_size) != 0)) {
+ ec_cbk_set_error(cbk, EIO, _gf_true);
}
ec_combine(cbk, ec_combine_readv);
}
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_complete(fop);
}
return 0;
}
-void ec_wind_readv(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_readv(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -1300,67 +1348,49 @@ void ec_wind_readv(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
fop->size, fop->offset, fop->uint32, fop->xdata);
}
-int32_t ec_manager_readv(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_readv(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk;
+ ec_t *ec = fop->xl->private;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
fop->user_size = fop->size;
- fop->head = ec_adjust_offset(fop->xl->private, &fop->offset, 1);
- fop->size = ec_adjust_size(fop->xl->private, fop->size + fop->head,
- 1);
+ fop->head = ec_adjust_offset_down(fop->xl->private, &fop->offset,
+ _gf_true);
+ fop->size += fop->head;
+ ec_adjust_size_up(fop->xl->private, &fop->size, _gf_true);
- /* Fall through */
+ /* Fall through */
case EC_STATE_LOCK:
- ec_lock_prepare_fd(fop, fop->fd, 0);
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, fop->offset,
+ fop->size);
ec_lock(fop);
- return EC_STATE_GET_SIZE_AND_VERSION;
-
- case EC_STATE_GET_SIZE_AND_VERSION:
- ec_get_size_version(fop);
-
return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH:
+ if (ec->read_mask) {
+ fop->mask &= ec->read_mask;
+ }
ec_dispatch_min(fop);
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret < 0)
- {
- ec_fop_set_error(fop, cbk->op_errno);
- }
- else
- {
- ec_iatt_rebuild(fop->xl->private, cbk->iatt, 1,
- cbk->count);
-
- if (!ec_readv_rebuild(fop->xl->private, fop, cbk))
- {
- ec_fop_set_error(fop, EIO);
- }
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
+ if (cbk != NULL) {
+ int32_t err;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 1, cbk->count);
+
+ err = ec_readv_rebuild(fop->xl->private, fop, cbk);
+ if (err != 0) {
+ ec_cbk_set_error(cbk, -err, _gf_true);
}
}
- else
- {
- ec_fop_set_error(fop, EIO);
- }
return EC_STATE_REPORT;
@@ -1369,8 +1399,7 @@ int32_t ec_manager_readv(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->cbks.readv != NULL)
- {
+ if (fop->cbks.readv != NULL) {
fop->cbks.readv(fop->req_frame, fop, fop->xl, cbk->op_ret,
cbk->op_errno, cbk->vector, cbk->int32,
&cbk->iatt[0], cbk->buffers, cbk->xdata);
@@ -1380,14 +1409,12 @@ int32_t ec_manager_readv(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_INIT:
case -EC_STATE_LOCK:
- case -EC_STATE_GET_SIZE_AND_VERSION:
case -EC_STATE_DISPATCH:
case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->cbks.readv != NULL)
- {
+ if (fop->cbks.readv != NULL) {
fop->cbks.readv(fop->req_frame, fop, fop->xl, -1, fop->error,
NULL, 0, NULL, NULL, NULL);
}
@@ -1407,32 +1434,32 @@ int32_t ec_manager_readv(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_readv(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_readv_cbk_t func, void * data, fd_t * fd,
- size_t size, off_t offset, uint32_t flags, dict_t * xdata)
+void
+ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readv_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
- ec_cbk_t callback = { .readv = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.readv = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(READ) %p", frame);
+ gf_msg_trace("ec", 0, "EC(READ) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_READ, EC_FLAG_UPDATE_FD,
- target, minimum, ec_wind_readv,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_READ, EC_FLAG_LOCK_SHARED,
+ target, fop_flags, ec_wind_readv,
ec_manager_readv, callback, data);
- if (fop == NULL)
- {
+ if (fop == NULL) {
goto out;
}
@@ -1442,24 +1469,22 @@ void ec_readv(call_frame_t * frame, xlator_t * this, uintptr_t target,
fop->offset = offset;
fop->uint32 = flags;
- if (fd != NULL)
- {
+ if (fd != NULL) {
fop->fd = fd_ref(fd);
- if (fop->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -1468,25 +1493,227 @@ void ec_readv(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, 0, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: seek */
+
+int32_t
+ec_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, off_t offset, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ ec_t *ec = this->private;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_SEEK, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ cbk->offset = offset;
+ }
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ }
+
+ if ((op_ret > 0) && ((cbk->offset % ec->fragment_size) != 0)) {
+ cbk->op_ret = -1;
+ cbk->op_errno = EIO;
+ }
+
+ ec_combine(cbk, NULL);
}
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, 0, NULL, NULL, NULL);
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_seek(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_seek_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->seek, fop->fd,
+ fop->offset, fop->seek, fop->xdata);
+}
+
+int32_t
+ec_manager_seek(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+ uint64_t size;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ fop->user_size = fop->offset;
+ fop->head = ec_adjust_offset_down(fop->xl->private, &fop->offset,
+ _gf_true);
+
+ /* Fall through */
+
+ case EC_STATE_LOCK:
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, fop->offset,
+ EC_RANGE_FULL);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(
+ ec_get_inode_size(fop, fop->locks[0].lock->loc.inode, &size));
+
+ if (fop->user_size >= size) {
+ ec_fop_set_error(fop, ENXIO);
+
+ return EC_STATE_REPORT;
+ }
+
+ ec_dispatch_one(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ if (ec_dispatch_one_retry(fop, &cbk)) {
+ return EC_STATE_DISPATCH;
+ }
+ if ((cbk != NULL) && (cbk->op_ret >= 0)) {
+ ec_t *ec = fop->xl->private;
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode,
+ &size));
+
+ cbk->offset *= ec->fragments;
+ if (cbk->offset < fop->user_size) {
+ cbk->offset = fop->user_size;
+ }
+ if (cbk->offset > size) {
+ cbk->offset = size;
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.seek != NULL) {
+ fop->cbks.seek(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->offset, cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.seek != NULL) {
+ fop->cbks.seek(fop->req_frame, fop, fop->xl, -1, fop->error, 0,
+ NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, 0, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_seek_cbk_t func, void *data, fd_t *fd,
+ off_t offset, gf_seek_what_t what, dict_t *xdata)
+{
+ ec_cbk_t callback = {.seek = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = EIO;
+
+ gf_msg_trace("ec", 0, "EC(SEEK) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SEEK, EC_FLAG_LOCK_SHARED,
+ target, fop_flags, ec_wind_seek, ec_manager_seek,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->offset = offset;
+ fop->seek = what;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, EIO, 0, NULL);
}
}
/* FOP: stat */
-int32_t ec_combine_stat(ec_fop_data_t * fop, ec_cbk_data_t * dst,
- ec_cbk_data_t * src)
+int32_t
+ec_combine_stat(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
{
- if (!ec_iatt_combine(dst->iatt, src->iatt, 1))
- {
- gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching iatt in "
- "answers of 'GF_FOP_STAT'");
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 1)) {
+ gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_IATT_MISMATCH,
+ "Mismatching iatt in "
+ "answers of 'GF_FOP_STAT'");
return 0;
}
@@ -1494,12 +1721,12 @@ int32_t ec_combine_stat(ec_fop_data_t * fop, ec_cbk_data_t * dst,
return 1;
}
-int32_t ec_stat_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, struct iatt * buf,
- dict_t * xdata)
+int32_t
+ec_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- ec_fop_data_t * fop = NULL;
- ec_cbk_data_t * cbk = NULL;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
@@ -1509,27 +1736,23 @@ int32_t ec_stat_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
fop = frame->local;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_STAT, idx, op_ret,
op_errno);
- if (cbk != NULL)
- {
- if (op_ret >= 0)
- {
- if (buf != NULL)
- {
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ if (buf != NULL) {
cbk->iatt[0] = *buf;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
cbk->xdata = dict_ref(xdata);
- if (cbk->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -1539,15 +1762,15 @@ int32_t ec_stat_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
}
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_complete(fop);
}
return 0;
}
-void ec_wind_stat(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_stat(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -1556,29 +1779,23 @@ void ec_wind_stat(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
&fop->loc[0], fop->xdata);
}
-int32_t ec_manager_stat(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_stat(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
case EC_STATE_LOCK:
- if (fop->fd == NULL)
- {
- ec_lock_prepare_inode(fop, &fop->loc[0], 0);
- }
- else
- {
- ec_lock_prepare_fd(fop, fop->fd, 0);
+ if (fop->fd == NULL) {
+ ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO, 0,
+ EC_RANGE_FULL);
+ } else {
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, 0,
+ EC_RANGE_FULL);
}
ec_lock(fop);
- return EC_STATE_GET_SIZE_AND_VERSION;
-
- case EC_STATE_GET_SIZE_AND_VERSION:
- ec_get_size_version(fop);
-
return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH:
@@ -1587,33 +1804,18 @@ int32_t ec_manager_stat(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret < 0)
- {
- ec_fop_set_error(fop, cbk->op_errno);
- }
- else
- {
- ec_iatt_rebuild(fop->xl->private, cbk->iatt, 1,
- cbk->count);
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
- cbk->iatt[0].ia_size = fop->pre_size;
+ if (cbk != NULL) {
+ if (cbk->iatt[0].ia_type == IA_IFREG) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 1, cbk->count);
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop,
+ fop->locks[0].lock->loc.inode,
+ &cbk->iatt[0].ia_size));
}
}
- else
- {
- ec_fop_set_error(fop, EIO);
- }
return EC_STATE_REPORT;
@@ -1622,18 +1824,13 @@ int32_t ec_manager_stat(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->id == GF_FOP_STAT)
- {
- if (fop->cbks.stat != NULL)
- {
+ if (fop->id == GF_FOP_STAT) {
+ if (fop->cbks.stat != NULL) {
fop->cbks.stat(fop->req_frame, fop, fop->xl, cbk->op_ret,
cbk->op_errno, &cbk->iatt[0], cbk->xdata);
}
- }
- else
- {
- if (fop->cbks.fstat != NULL)
- {
+ } else {
+ if (fop->cbks.fstat != NULL) {
fop->cbks.fstat(fop->req_frame, fop, fop->xl, cbk->op_ret,
cbk->op_errno, &cbk->iatt[0], cbk->xdata);
}
@@ -1643,24 +1840,18 @@ int32_t ec_manager_stat(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_INIT:
case -EC_STATE_LOCK:
- case -EC_STATE_GET_SIZE_AND_VERSION:
case -EC_STATE_DISPATCH:
case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->id == GF_FOP_STAT)
- {
- if (fop->cbks.stat != NULL)
- {
- fop->cbks.stat(fop->req_frame, fop, fop->xl, -1,
- fop->error, NULL, NULL);
+ if (fop->id == GF_FOP_STAT) {
+ if (fop->cbks.stat != NULL) {
+ fop->cbks.stat(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL);
}
- }
- else
- {
- if (fop->cbks.fstat != NULL)
- {
+ } else {
+ if (fop->cbks.fstat != NULL) {
fop->cbks.fstat(fop->req_frame, fop, fop->xl, -1,
fop->error, NULL, NULL);
}
@@ -1681,51 +1872,49 @@ int32_t ec_manager_stat(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_stat(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_stat_cbk_t func, void * data, loc_t * loc,
- dict_t * xdata)
+void
+ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_stat_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata)
{
- ec_cbk_t callback = { .stat = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.stat = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(STAT) %p", frame);
+ gf_msg_trace("ec", 0, "EC(STAT) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_STAT,
- EC_FLAG_UPDATE_LOC_INODE, target, minimum,
- ec_wind_stat, ec_manager_stat, callback, data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_STAT, EC_FLAG_LOCK_SHARED,
+ target, fop_flags, ec_wind_stat, ec_manager_stat,
+ callback, data);
+ if (fop == NULL) {
goto out;
}
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -1734,24 +1923,21 @@ void ec_stat(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
}
}
/* FOP: fstat */
-int32_t ec_fstat_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, struct iatt * buf,
- dict_t * xdata)
+int32_t
+ec_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- ec_fop_data_t * fop = NULL;
- ec_cbk_data_t * cbk = NULL;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
@@ -1761,27 +1947,23 @@ int32_t ec_fstat_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
fop = frame->local;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FSTAT, idx, op_ret,
op_errno);
- if (cbk != NULL)
- {
- if (op_ret >= 0)
- {
- if (buf != NULL)
- {
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ if (buf != NULL) {
cbk->iatt[0] = *buf;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
cbk->xdata = dict_ref(xdata);
- if (cbk->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -1791,15 +1973,15 @@ int32_t ec_fstat_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
}
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_complete(fop);
}
return 0;
}
-void ec_wind_fstat(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_fstat(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -1808,48 +1990,46 @@ void ec_wind_fstat(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
fop->xdata);
}
-void ec_fstat(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_fstat_cbk_t func, void * data, fd_t * fd,
- dict_t * xdata)
+void
+ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fstat_cbk_t func, void *data, fd_t *fd,
+ dict_t *xdata)
{
- ec_cbk_t callback = { .fstat = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.fstat = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(FSTAT) %p", frame);
+ gf_msg_trace("ec", 0, "EC(FSTAT) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSTAT,
- EC_FLAG_UPDATE_FD_INODE, target, minimum,
- ec_wind_fstat, ec_manager_stat, callback, data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSTAT, EC_FLAG_LOCK_SHARED,
+ target, fop_flags, ec_wind_fstat,
+ ec_manager_stat, callback, data);
+ if (fop == NULL) {
goto out;
}
fop->use_fd = 1;
- if (fd != NULL)
- {
+ if (fd != NULL) {
fop->fd = fd_ref(fd);
- if (fop->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -1858,12 +2038,9 @@ void ec_fstat(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
}
}
diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
index 6b485a26fbc..9b5fe2a7fdc 100644
--- a/xlators/cluster/ec/src/ec-inode-write.c
+++ b/xlators/cluster/ec/src/ec-inode-write.c
@@ -8,72 +8,163 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-
+#include "ec-messages.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
#include "ec-method.h"
#include "ec-fops.h"
+#include "ec-mem-types.h"
+
+int32_t
+ec_update_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_cbk_data_t *cbk = NULL;
+ ec_fop_data_t *parent = fop->parent;
+ int i = 0;
+
+ ec_trace("UPDATE_WRITEV_CBK", cookie, "ret=%d, errno=%d, parent-fop=%s",
+ op_ret, op_errno, ec_fop_name(parent->id));
+
+ if (op_ret < 0) {
+ ec_fop_set_error(parent, op_errno);
+ goto out;
+ }
+ cbk = ec_cbk_data_allocate(parent->frame, this, parent, parent->id, 0,
+ op_ret, op_errno);
+ if (!cbk) {
+ ec_fop_set_error(parent, ENOMEM);
+ goto out;
+ }
+
+ if (xdata)
+ cbk->xdata = dict_ref(xdata);
+
+ if (prebuf)
+ cbk->iatt[i++] = *prebuf;
+
+ if (postbuf)
+ cbk->iatt[i++] = *postbuf;
+
+ LOCK(&parent->lock);
+ {
+ parent->good &= fop->good;
+
+ if (gf_bits_count(parent->good) < parent->minimum) {
+ __ec_fop_set_error(parent, EIO);
+ } else if (fop->error == 0 && parent->answer == NULL) {
+ parent->answer = cbk;
+ }
+ }
+ UNLOCK(&parent->lock);
+out:
+ return 0;
+}
+
+static int32_t
+ec_update_write(ec_fop_data_t *fop, uintptr_t mask, off_t offset, uint64_t size)
+{
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iovec vector;
+ int32_t err = -ENOMEM;
+
+ iobref = iobref_new();
+ if (iobref == NULL) {
+ goto out;
+ }
+ iobuf = iobuf_get(fop->xl->ctx->iobuf_pool);
+ if (iobuf == NULL) {
+ goto out;
+ }
+ err = iobref_add(iobref, iobuf);
+ if (err != 0) {
+ goto out;
+ }
+
+ if (fop->locks[0].lock)
+ ec_lock_update_good(fop->locks[0].lock, fop);
+ vector.iov_base = iobuf->ptr;
+ vector.iov_len = size;
+ memset(vector.iov_base, 0, vector.iov_len);
+
+ ec_writev(fop->frame, fop->xl, mask, fop->minimum, ec_update_writev_cbk,
+ NULL, fop->fd, &vector, 1, offset, 0, iobref, NULL);
+
+ err = 0;
+
+out:
+ if (iobuf != NULL) {
+ iobuf_unref(iobuf);
+ }
+ if (iobref != NULL) {
+ iobref_unref(iobref);
+ }
+
+ return err;
+}
int
-ec_inode_write_cbk (call_frame_t *frame, xlator_t *this, void *cookie,
- int op_ret, int op_errno, struct iatt *prestat,
- struct iatt *poststat, dict_t *xdata)
+ec_inode_write_cbk(call_frame_t *frame, xlator_t *this, void *cookie,
+ int op_ret, int op_errno, struct iatt *prestat,
+ struct iatt *poststat, dict_t *xdata)
{
- ec_fop_data_t *fop = NULL;
- ec_cbk_data_t *cbk = NULL;
- int i = 0;
- int idx = 0;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int i = 0;
+ int idx = 0;
- VALIDATE_OR_GOTO (this, out);
- GF_VALIDATE_OR_GOTO (this->name, frame, out);
- GF_VALIDATE_OR_GOTO (this->name, frame->local, out);
- GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = frame->local;
- idx = (int32_t)(uintptr_t) cookie;
+ fop = frame->local;
+ idx = (int32_t)(uintptr_t)cookie;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
- cbk = ec_cbk_data_allocate (frame, this, fop, fop->id, idx, op_ret,
- op_errno);
- if (!cbk)
- goto out;
+ cbk = ec_cbk_data_allocate(frame, this, fop, fop->id, idx, op_ret,
+ op_errno);
+ if (!cbk)
+ goto out;
- if (op_ret < 0)
- goto out;
+ if (op_ret < 0)
+ goto out;
- if (xdata)
- cbk->xdata = dict_ref (xdata);
+ if (xdata)
+ cbk->xdata = dict_ref(xdata);
- if (prestat)
- cbk->iatt[i++] = *prestat;
+ if (prestat)
+ cbk->iatt[i++] = *prestat;
- if (poststat)
- cbk->iatt[i++] = *poststat;
+ if (poststat)
+ cbk->iatt[i++] = *poststat;
out:
- if (cbk)
- ec_combine (cbk, ec_combine_write);
+ if (cbk)
+ ec_combine(cbk, ec_combine_write);
- if (fop)
- ec_complete (fop);
- return 0;
+ if (fop)
+ ec_complete(fop);
+ return 0;
}
/* FOP: removexattr */
-int32_t ec_removexattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
+int32_t
+ec_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno,
- NULL, NULL, xdata);
+ return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, NULL, NULL,
+ xdata);
}
-void ec_wind_removexattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_removexattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -83,170 +174,154 @@ void ec_wind_removexattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
}
void
-ec_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+ec_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- ec_fop_data_t *fop = cookie;
- switch (fop->id) {
+ ec_fop_data_t *fop = cookie;
+ switch (fop->id) {
case GF_FOP_SETXATTR:
- if (fop->cbks.setxattr) {
- fop->cbks.setxattr (frame, cookie, this, op_ret,
- op_errno, xdata);
- }
- break;
+ if (fop->cbks.setxattr) {
+ QUORUM_CBK(fop->cbks.setxattr, fop, frame, cookie, this, op_ret,
+ op_errno, xdata);
+ }
+ break;
case GF_FOP_REMOVEXATTR:
- if (fop->cbks.removexattr) {
- fop->cbks.removexattr (frame, cookie, this, op_ret,
- op_errno, xdata);
- }
- break;
+ if (fop->cbks.removexattr) {
+ QUORUM_CBK(fop->cbks.removexattr, fop, frame, cookie, this,
+ op_ret, op_errno, xdata);
+ }
+ break;
case GF_FOP_FSETXATTR:
- if (fop->cbks.fsetxattr) {
- fop->cbks.fsetxattr (frame, cookie, this, op_ret,
- op_errno, xdata);
- }
- break;
+ if (fop->cbks.fsetxattr) {
+ QUORUM_CBK(fop->cbks.fsetxattr, fop, frame, cookie, this,
+ op_ret, op_errno, xdata);
+ }
+ break;
case GF_FOP_FREMOVEXATTR:
- if (fop->cbks.fremovexattr) {
- fop->cbks.fremovexattr (frame, cookie, this, op_ret,
- op_errno, xdata);
- }
- break;
- }
+ if (fop->cbks.fremovexattr) {
+ QUORUM_CBK(fop->cbks.fremovexattr, fop, frame, cookie, this,
+ op_ret, op_errno, xdata);
+ }
+ break;
+ }
}
int32_t
-ec_manager_xattr (ec_fop_data_t *fop, int32_t state)
+ec_manager_xattr(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk;
- switch (state) {
+ switch (state) {
case EC_STATE_INIT:
case EC_STATE_LOCK:
- if (fop->fd == NULL)
- ec_lock_prepare_inode(fop, &fop->loc[0], 1);
- else
- ec_lock_prepare_fd(fop, fop->fd, 1);
-
- ec_lock(fop);
+ if (fop->fd == NULL) {
+ ec_lock_prepare_inode(fop, &fop->loc[0],
+ EC_UPDATE_META | EC_QUERY_INFO, 0,
+ EC_RANGE_FULL);
+ } else {
+ ec_lock_prepare_fd(fop, fop->fd, EC_UPDATE_META | EC_QUERY_INFO,
+ 0, EC_RANGE_FULL);
+ }
+ ec_lock(fop);
- return EC_STATE_DISPATCH;
+ return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH:
- ec_dispatch_all(fop);
+ ec_dispatch_all(fop);
- return EC_STATE_PREPARE_ANSWER;
+ return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk) {
- if (!ec_dict_combine (cbk, EC_COMBINE_XDATA)) {
- if (cbk->op_ret >= 0) {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
-
- if (cbk->op_ret < 0)
- ec_fop_set_error(fop, cbk->op_errno);
- } else {
- ec_fop_set_error(fop, EIO);
- }
+ ec_fop_prepare_answer(fop, _gf_false);
- return EC_STATE_REPORT;
+ return EC_STATE_REPORT;
case EC_STATE_REPORT:
- cbk = fop->answer;
+ cbk = fop->answer;
- GF_ASSERT(cbk != NULL);
+ GF_ASSERT(cbk != NULL);
- ec_xattr_cbk (fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, cbk->xdata);
+ ec_xattr_cbk(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->xdata);
- return EC_STATE_LOCK_REUSE;
+ return EC_STATE_LOCK_REUSE;
case -EC_STATE_INIT:
case -EC_STATE_LOCK:
case -EC_STATE_DISPATCH:
case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
- GF_ASSERT(fop->error != 0);
+ GF_ASSERT(fop->error != 0);
- ec_xattr_cbk (fop->req_frame, fop, fop->xl, -1, fop->error,
- NULL);
+ ec_xattr_cbk(fop->req_frame, fop, fop->xl, -1, fop->error, NULL);
- return EC_STATE_LOCK_REUSE;
+ return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
- ec_lock_reuse(fop);
+ ec_lock_reuse(fop);
- return EC_STATE_UNLOCK;
+ return EC_STATE_UNLOCK;
case -EC_STATE_UNLOCK:
case EC_STATE_UNLOCK:
- ec_unlock(fop);
+ ec_unlock(fop);
- return EC_STATE_END;
+ return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
- return EC_STATE_END;
- }
+ return EC_STATE_END;
+ }
}
void
-ec_removexattr (call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_removexattr_cbk_t func, void *data,
- loc_t *loc, const char *name, dict_t *xdata)
+ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_removexattr_cbk_t func, void *data,
+ loc_t *loc, const char *name, dict_t *xdata)
{
- ec_cbk_t callback = { .removexattr = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.removexattr = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(REMOVEXATTR) %p", frame);
+ gf_msg_trace("ec", 0, "EC(REMOVEXATTR) %p", frame);
- VALIDATE_OR_GOTO (this, out);
- GF_VALIDATE_OR_GOTO (this->name, frame, out);
- GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_REMOVEXATTR,
- EC_FLAG_UPDATE_LOC_INODE, target, minimum,
- ec_wind_removexattr, ec_manager_xattr,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_REMOVEXATTR, 0, target,
+ fop_flags, ec_wind_removexattr, ec_manager_xattr,
callback, data);
- if (fop == NULL)
- {
+ if (fop == NULL) {
goto out;
}
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (name != NULL)
- {
+ if (name != NULL) {
fop->str[0] = gf_strdup(name);
- if (fop->str[0] == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to duplicate a string.");
+ if (fop->str[0] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
goto out;
}
}
- if (xdata != NULL)
- {
- fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -256,23 +331,24 @@ ec_removexattr (call_frame_t *frame, xlator_t *this, uintptr_t target,
out:
if (fop != NULL) {
- ec_manager (fop, error);
+ ec_manager(fop, error);
} else {
- func (frame, NULL, this, -1, error, NULL);
+ func(frame, NULL, this, -1, error, NULL);
}
}
/* FOP: fremovexattr */
-int32_t ec_fremovexattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
+int32_t
+ec_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno,
- NULL, NULL, xdata);
+ return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, NULL, NULL,
+ xdata);
}
-void ec_wind_fremovexattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_fremovexattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -282,59 +358,54 @@ void ec_wind_fremovexattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
}
void
-ec_fremovexattr (call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fremovexattr_cbk_t func, void *data,
- fd_t *fd, const char *name, dict_t *xdata)
+ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fremovexattr_cbk_t func, void *data,
+ fd_t *fd, const char *name, dict_t *xdata)
{
- ec_cbk_t callback = { .fremovexattr = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.fremovexattr = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(FREMOVEXATTR) %p", frame);
+ gf_msg_trace("ec", 0, "EC(FREMOVEXATTR) %p", frame);
- VALIDATE_OR_GOTO (this, out);
- GF_VALIDATE_OR_GOTO (this->name, frame, out);
- GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FREMOVEXATTR,
- EC_FLAG_UPDATE_FD_INODE, target, minimum,
- ec_wind_fremovexattr, ec_manager_xattr,
- callback, data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FREMOVEXATTR, 0, target,
+ fop_flags, ec_wind_fremovexattr,
+ ec_manager_xattr, callback, data);
+ if (fop == NULL) {
goto out;
}
fop->use_fd = 1;
- if (fd != NULL)
- {
+ if (fd != NULL) {
fop->fd = fd_ref(fd);
- if (fop->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
- if (name != NULL)
- {
+ if (name != NULL) {
fop->str[0] = gf_strdup(name);
- if (fop->str[0] == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to duplicate a string.");
+ if (fop->str[0] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
goto out;
}
}
- if (xdata != NULL)
- {
- fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -344,24 +415,25 @@ ec_fremovexattr (call_frame_t *frame, xlator_t *this, uintptr_t target,
out:
if (fop != NULL) {
- ec_manager (fop, error);
+ ec_manager(fop, error);
} else {
- func (frame, NULL, this, -1, error, NULL);
+ func(frame, NULL, this, -1, error, NULL);
}
}
/* FOP: setattr */
-int32_t ec_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prestat, struct iatt *poststat,
- dict_t *xdata)
+int32_t
+ec_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prestat,
+ struct iatt *poststat, dict_t *xdata)
{
- return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno,
- prestat, poststat, xdata);
+ return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prestat,
+ poststat, xdata);
}
-void ec_wind_setattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_setattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -370,29 +442,24 @@ void ec_wind_setattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
&fop->loc[0], &fop->iatt, fop->int32, fop->xdata);
}
-int32_t ec_manager_setattr(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_setattr(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
case EC_STATE_LOCK:
- if (fop->fd == NULL)
- {
- ec_lock_prepare_inode(fop, &fop->loc[0], 1);
- }
- else
- {
- ec_lock_prepare_fd(fop, fop->fd, 1);
+ if (fop->fd == NULL) {
+ ec_lock_prepare_inode(fop, &fop->loc[0],
+ EC_UPDATE_META | EC_QUERY_INFO, 0,
+ EC_RANGE_FULL);
+ } else {
+ ec_lock_prepare_fd(fop, fop->fd, EC_UPDATE_META | EC_QUERY_INFO,
+ 0, EC_RANGE_FULL);
}
ec_lock(fop);
- return EC_STATE_GET_SIZE_AND_VERSION;
-
- case EC_STATE_GET_SIZE_AND_VERSION:
- ec_get_size_version(fop);
-
return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH:
@@ -401,34 +468,18 @@ int32_t ec_manager_setattr(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret < 0)
- {
- ec_fop_set_error(fop, cbk->op_errno);
- }
- else
- {
- ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2,
- cbk->count);
-
- cbk->iatt[0].ia_size = fop->pre_size;
- cbk->iatt[1].ia_size = fop->pre_size;
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ if (cbk->iatt[0].ia_type == IA_IFREG) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count);
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop,
+ fop->locks[0].lock->loc.inode,
+ &cbk->iatt[0].ia_size));
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
}
}
- else
- {
- ec_fop_set_error(fop, EIO);
- }
return EC_STATE_REPORT;
@@ -437,24 +488,17 @@ int32_t ec_manager_setattr(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->id == GF_FOP_SETATTR)
- {
- if (fop->cbks.setattr != NULL)
- {
- fop->cbks.setattr(fop->req_frame, fop, fop->xl,
- cbk->op_ret, cbk->op_errno,
- &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ if (fop->id == GF_FOP_SETATTR) {
+ if (fop->cbks.setattr != NULL) {
+ QUORUM_CBK(fop->cbks.setattr, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
}
- }
- else
- {
- if (fop->cbks.fsetattr != NULL)
- {
- fop->cbks.fsetattr(fop->req_frame, fop, fop->xl,
- cbk->op_ret, cbk->op_errno,
- &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ } else {
+ if (fop->cbks.fsetattr != NULL) {
+ QUORUM_CBK(fop->cbks.fsetattr, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
}
}
@@ -462,24 +506,18 @@ int32_t ec_manager_setattr(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_INIT:
case -EC_STATE_LOCK:
- case -EC_STATE_GET_SIZE_AND_VERSION:
case -EC_STATE_DISPATCH:
case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->id == GF_FOP_SETATTR)
- {
- if (fop->cbks.setattr != NULL)
- {
+ if (fop->id == GF_FOP_SETATTR) {
+ if (fop->cbks.setattr != NULL) {
fop->cbks.setattr(fop->req_frame, fop, fop->xl, -1,
fop->error, NULL, NULL, NULL);
}
- }
- else
- {
- if (fop->cbks.fsetattr != NULL)
- {
+ } else {
+ if (fop->cbks.fsetattr != NULL) {
fop->cbks.fsetattr(fop->req_frame, fop, fop->xl, -1,
fop->error, NULL, NULL, NULL);
}
@@ -500,59 +538,54 @@ int32_t ec_manager_setattr(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_setattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_setattr_cbk_t func, void * data,
- loc_t * loc, struct iatt * stbuf, int32_t valid,
- dict_t * xdata)
+void
+ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_setattr_cbk_t func, void *data, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- ec_cbk_t callback = { .setattr = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.setattr = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(SETATTR) %p", frame);
+ gf_msg_trace("ec", 0, "EC(SETATTR) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_SETATTR,
- EC_FLAG_UPDATE_LOC_INODE, target, minimum,
- ec_wind_setattr, ec_manager_setattr, callback,
- data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SETATTR, 0, target,
+ fop_flags, ec_wind_setattr, ec_manager_setattr,
+ callback, data);
+ if (fop == NULL) {
goto out;
}
fop->int32 = valid;
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (stbuf != NULL)
- {
+ if (stbuf != NULL) {
fop->iatt = *stbuf;
}
- if (xdata != NULL)
- {
- fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -561,28 +594,26 @@ void ec_setattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
}
}
/* FOP: fsetattr */
-int32_t ec_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prestat, struct iatt *poststat,
- dict_t *xdata)
+int32_t
+ec_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prestat,
+ struct iatt *poststat, dict_t *xdata)
{
- return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno,
- prestat, poststat, xdata);
+ return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prestat,
+ poststat, xdata);
}
-void ec_wind_fsetattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_fsetattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -591,26 +622,25 @@ void ec_wind_fsetattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
fop->fd, &fop->iatt, fop->int32, fop->xdata);
}
-void ec_fsetattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_fsetattr_cbk_t func, void * data,
- fd_t * fd, struct iatt * stbuf, int32_t valid, dict_t * xdata)
+void
+ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsetattr_cbk_t func, void *data, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- ec_cbk_t callback = { .fsetattr = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.fsetattr = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(FSETATTR) %p", frame);
+ gf_msg_trace("ec", 0, "EC(FSETATTR) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETATTR,
- EC_FLAG_UPDATE_FD_INODE, target, minimum,
- ec_wind_fsetattr, ec_manager_setattr, callback,
- data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETATTR, 0, target,
+ fop_flags, ec_wind_fsetattr, ec_manager_setattr,
+ callback, data);
+ if (fop == NULL) {
goto out;
}
@@ -618,28 +648,25 @@ void ec_fsetattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
fop->int32 = valid;
- if (fd != NULL)
- {
+ if (fd != NULL) {
fop->fd = fd_ref(fd);
- if (fop->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
- if (stbuf != NULL)
- {
+ if (stbuf != NULL) {
fop->iatt = *stbuf;
}
- if (xdata != NULL)
- {
- fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -648,26 +675,25 @@ void ec_fsetattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
}
}
/* FOP: setxattr */
-int32_t ec_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+int32_t
+ec_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno,
- NULL, NULL, xdata);
+ return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, NULL, NULL,
+ xdata);
}
-void ec_wind_setxattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_setxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -677,58 +703,53 @@ void ec_wind_setxattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
}
void
-ec_setxattr (call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_setxattr_cbk_t func, void *data,
- loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
+ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_setxattr_cbk_t func, void *data, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
- ec_cbk_t callback = { .setxattr = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.setxattr = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(SETXATTR) %p", frame);
+ gf_msg_trace("ec", 0, "EC(SETXATTR) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_SETXATTR,
- EC_FLAG_UPDATE_LOC_INODE, target, minimum,
- ec_wind_setxattr, ec_manager_xattr, callback,
- data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SETXATTR, 0, target,
+ fop_flags, ec_wind_setxattr, ec_manager_xattr,
+ callback, data);
+ if (fop == NULL) {
goto out;
}
fop->int32 = flags;
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (dict != NULL)
- {
- fop->dict = dict_ref(dict);
- if (fop->dict == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (dict != NULL) {
+ fop->dict = dict_copy_with_ref(dict, NULL);
+ if (fop->dict == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
}
- if (xdata != NULL)
- {
- fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -738,43 +759,41 @@ ec_setxattr (call_frame_t *frame, xlator_t *this, uintptr_t target,
out:
if (fop != NULL) {
- ec_manager (fop, error);
+ ec_manager(fop, error);
} else {
- func (frame, NULL, this, -1, error, NULL);
+ func(frame, NULL, this, -1, error, NULL);
}
}
/* FOP: fsetxattr */
int32_t
-ec_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+ec_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- ec_fop_data_t * fop = NULL;
- ec_cbk_data_t * cbk = NULL;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
- VALIDATE_OR_GOTO (this, out);
- GF_VALIDATE_OR_GOTO (this->name, frame, out);
- GF_VALIDATE_OR_GOTO (this->name, frame->local, out);
- GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = frame->local;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FSETXATTR, idx, op_ret,
op_errno);
- if (cbk != NULL)
- {
- if (xdata != NULL)
- {
+ if (cbk != NULL) {
+ if (xdata != NULL) {
cbk->xdata = dict_ref(xdata);
- if (cbk->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -784,15 +803,15 @@ ec_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_complete(fop);
}
return 0;
}
-void ec_wind_fsetxattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_fsetxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -802,26 +821,24 @@ void ec_wind_fsetxattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
}
void
-ec_fsetxattr (call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fsetxattr_cbk_t func, void *data,
- fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata)
+ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsetxattr_cbk_t func, void *data, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
- ec_cbk_t callback = { .fsetxattr = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.fsetxattr = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(FSETXATTR) %p", frame);
+ gf_msg_trace("ec", 0, "EC(FSETXATTR) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETXATTR,
- EC_FLAG_UPDATE_FD_INODE, target, minimum,
- ec_wind_fsetxattr, ec_manager_xattr,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETXATTR, 0, target,
+ fop_flags, ec_wind_fsetxattr, ec_manager_xattr,
callback, data);
- if (fop == NULL)
- {
+ if (fop == NULL) {
goto out;
}
@@ -829,35 +846,32 @@ ec_fsetxattr (call_frame_t *frame, xlator_t *this, uintptr_t target,
fop->int32 = flags;
- if (fd != NULL)
- {
+ if (fd != NULL) {
fop->fd = fd_ref(fd);
- if (fop->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
- if (dict != NULL)
- {
- fop->dict = dict_ref(dict);
- if (fop->dict == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (dict != NULL) {
+ fop->dict = dict_copy_with_ref(dict, NULL);
+ if (fop->dict == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
}
- if (xdata != NULL)
- {
- fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -867,110 +881,515 @@ ec_fsetxattr (call_frame_t *frame, xlator_t *this, uintptr_t target,
out:
if (fop != NULL) {
- ec_manager (fop, error);
+ ec_manager(fop, error);
} else {
- func (frame, NULL, this, -1, error, NULL);
+ func(frame, NULL, this, -1, error, NULL);
}
}
-/* FOP: truncate */
+/*********************************************************************
+ *
+ * File Operation : fallocate
+ *
+ *********************************************************************/
-int32_t ec_truncate_write(ec_fop_data_t * fop, uintptr_t mask)
+int32_t
+ec_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- ec_t * ec = fop->xl->private;
- struct iobref * iobref = NULL;
- struct iobuf * iobuf = NULL;
- struct iovec vector;
- int32_t ret = 0;
+ return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+}
- iobref = iobref_new();
- if (iobref == NULL)
- {
- goto out;
- }
- iobuf = iobuf_get(fop->xl->ctx->iobuf_pool);
- if (iobuf == NULL)
- {
- goto out;
+void
+ec_wind_fallocate(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_fallocate_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->fallocate,
+ fop->fd, fop->int32, fop->offset, fop->size, fop->xdata);
+}
+
+int32_t
+ec_manager_fallocate(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk = NULL;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ if (fop->size == 0) {
+ ec_fop_set_error(fop, EINVAL);
+ return EC_STATE_REPORT;
+ }
+ if (fop->int32 &
+ (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE |
+ FALLOC_FL_ZERO_RANGE | FALLOC_FL_PUNCH_HOLE)) {
+ ec_fop_set_error(fop, ENOTSUP);
+ return EC_STATE_REPORT;
+ }
+ fop->user_size = fop->offset + fop->size;
+ fop->head = ec_adjust_offset_down(fop->xl->private, &fop->offset,
+ _gf_true);
+ fop->size += fop->head;
+ ec_adjust_size_up(fop->xl->private, &fop->size, _gf_true);
+
+ /* Fall through */
+
+ case EC_STATE_LOCK:
+ ec_lock_prepare_fd(fop, fop->fd,
+ EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
+ fop->offset, fop->size);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count);
+
+ /* This shouldn't fail because we have the inode locked. */
+ LOCK(&fop->locks[0].lock->loc.inode->lock);
+ {
+ GF_ASSERT(__ec_get_inode_size(fop,
+ fop->locks[0].lock->loc.inode,
+ &cbk->iatt[0].ia_size));
+
+ /*If mode has FALLOC_FL_KEEP_SIZE keep the size */
+ if (fop->int32 & FALLOC_FL_KEEP_SIZE) {
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ } else if (fop->user_size > cbk->iatt[0].ia_size) {
+ cbk->iatt[1].ia_size = fop->user_size;
+
+ /* This shouldn't fail because we have the inode
+ * locked. */
+ GF_ASSERT(__ec_set_inode_size(
+ fop, fop->locks[0].lock->loc.inode,
+ cbk->iatt[1].ia_size));
+ } else {
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ }
+ }
+ UNLOCK(&fop->locks[0].lock->loc.inode->lock);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.fallocate != NULL) {
+ QUORUM_CBK(fop->cbks.fallocate, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.fallocate != NULL) {
+ fop->cbks.fallocate(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
}
- if (iobref_add(iobref, iobuf) != 0)
- {
+}
+
+void
+ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fallocate_cbk_t func, void *data, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata)
+{
+ ec_cbk_t callback = {.fallocate = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(FALLOCATE) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FALLOCATE, 0, target,
+ fop_flags, ec_wind_fallocate,
+ ec_manager_fallocate, callback, data);
+ if (fop == NULL) {
goto out;
}
- vector.iov_base = iobuf->ptr;
- vector.iov_len = fop->offset * ec->fragments - fop->user_size;
- memset(vector.iov_base, 0, vector.iov_len);
+ fop->use_fd = 1;
+ fop->int32 = mode;
+ fop->offset = offset;
+ fop->size = len;
- iobuf_unref (iobuf);
- iobuf = NULL;
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+ goto out;
+ }
+ }
- ec_writev(fop->frame, fop->xl, mask, fop->minimum, NULL, NULL, fop->fd,
- &vector, 1, fop->user_size, 0, iobref, NULL);
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+ goto out;
+ }
+ }
- ret = 1;
+ error = 0;
out:
- if (iobuf != NULL)
- {
- iobuf_unref(iobuf);
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
}
- if (iobref != NULL)
- {
- iobref_unref(iobref);
+}
+
+/*********************************************************************
+ *
+ * File Operation : Discard
+ *
+ *********************************************************************/
+void
+ec_update_discard_write(ec_fop_data_t *fop, uintptr_t mask)
+{
+ ec_t *ec = fop->xl->private;
+ off_t off_head = 0;
+ off_t off_tail = 0;
+ uint64_t size_head = 0;
+ uint64_t size_tail = 0;
+ int error = 0;
+
+ off_head = fop->offset * ec->fragments - fop->int32;
+ if (fop->size == 0) {
+ error = ec_update_write(fop, mask, off_head, fop->user_size);
+ } else {
+ size_head = fop->int32;
+ size_tail = (off_head + fop->user_size) % ec->stripe_size;
+ off_tail = off_head + fop->user_size - size_tail;
+ if (size_head) {
+ error = ec_update_write(fop, mask, off_head, size_head);
+ if (error) {
+ goto out;
+ }
+ }
+ if (size_tail) {
+ error = ec_update_write(fop, mask, off_tail, size_tail);
+ }
}
+out:
+ if (error)
+ ec_fop_set_error(fop, -error);
+}
+
+void
+ec_discard_adjust_offset_size(ec_fop_data_t *fop)
+{
+ ec_t *ec = fop->xl->private;
- return ret;
+ fop->user_size = fop->size;
+ /* If discard length covers at least a fragment on brick, we will
+ * perform discard operation(when fop->size is non-zero) else we just
+ * write zeros.
+ */
+ fop->int32 = ec_adjust_offset_up(ec, &fop->offset, _gf_true);
+ fop->frag_range.first = fop->offset;
+ if (fop->size < fop->int32) {
+ fop->size = 0;
+ } else {
+ fop->size -= fop->int32;
+ ec_adjust_size_down(ec, &fop->size, _gf_true);
+ }
+ fop->frag_range.last = fop->offset + fop->size;
}
-int32_t ec_truncate_open_cbk(call_frame_t * frame, void * cookie,
- xlator_t * this, int32_t op_ret, int32_t op_errno,
- fd_t * fd, dict_t * xdata)
+int32_t
+ec_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- ec_fop_data_t * fop = cookie;
+ return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+}
- if (op_ret >= 0)
- {
- if (!ec_truncate_write(fop->parent, fop->answer->mask))
- {
- fop->error = EIO;
+void
+ec_wind_discard(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_discard_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->discard,
+ fop->fd, fop->offset, fop->size, fop->xdata);
+}
+
+int32_t
+ec_manager_discard(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk = NULL;
+ off_t fl_start = 0;
+ uint64_t fl_size = 0;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ if ((fop->size <= 0) || (fop->offset < 0)) {
+ ec_fop_set_error(fop, EINVAL);
+ return EC_STATE_REPORT;
+ }
+ /* Because of the head/tail writes, "discard" happens on the
+ * remaining regions, but we need to compute region including
+ * head/tail writes so compute them separately*/
+ fl_start = fop->offset;
+ fl_size = fop->size;
+ fl_size += ec_adjust_offset_down(fop->xl->private, &fl_start,
+ _gf_true);
+ ec_adjust_size_up(fop->xl->private, &fl_size, _gf_true);
+
+ ec_discard_adjust_offset_size(fop);
+
+ /* Fall through */
+
+ case EC_STATE_LOCK:
+ ec_lock_prepare_fd(fop, fop->fd,
+ EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
+ fl_start, fl_size);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+
+ /* Dispatch discard fop only if we have whole fragment
+ * to deallocate */
+ if (fop->size) {
+ ec_dispatch_all(fop);
+ return EC_STATE_DELAYED_START;
+ } else {
+ /* Assume discard to have succeeded on all bricks */
+ ec_succeed_all(fop);
+ }
+
+ /* Fall through */
+
+ case EC_STATE_DELAYED_START:
+
+ if (fop->size) {
+ if (fop->answer && fop->answer->op_ret == 0)
+ ec_update_discard_write(fop, fop->answer->mask);
+ } else {
+ ec_update_discard_write(fop, fop->mask);
+ }
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count);
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode,
+ &cbk->iatt[0].ia_size));
+
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ }
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.discard != NULL) {
+ QUORUM_CBK(fop->cbks.discard, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_DELAYED_START:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.discard != NULL) {
+ fop->cbks.discard(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_discard_cbk_t func, void *data, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ ec_cbk_t callback = {.discard = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(DISCARD) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_DISCARD, 0, target,
+ fop_flags, ec_wind_discard, ec_manager_discard,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+ fop->offset = offset;
+ fop->size = len;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ }
+
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
+
+/*********************************************************************
+ *
+ * File Operation : truncate
+ *
+ *********************************************************************/
+
+int32_t
+ec_update_truncate_write(ec_fop_data_t *fop, uintptr_t mask)
+{
+ ec_t *ec = fop->xl->private;
+ uint64_t size = fop->offset * ec->fragments - fop->user_size;
+ return ec_update_write(fop, mask, fop->user_size, size);
+}
+
+int32_t
+ec_truncate_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ int32_t err;
+
+ fop->parent->good &= fop->good;
+ if (op_ret >= 0) {
+ fd_bind(fd);
+ err = ec_update_truncate_write(fop->parent, fop->answer->mask);
+ if (err != 0) {
+ ec_fop_set_error(fop->parent, -err);
}
}
return 0;
}
-int32_t ec_truncate_clean(ec_fop_data_t * fop)
+int32_t
+ec_truncate_clean(ec_fop_data_t *fop)
{
- if (fop->fd == NULL)
- {
+ if (fop->fd == NULL) {
fop->fd = fd_create(fop->loc[0].inode, fop->frame->root->pid);
- if (fop->fd == NULL)
- {
- return 0;
+ if (fop->fd == NULL) {
+ return -ENOMEM;
}
ec_open(fop->frame, fop->xl, fop->answer->mask, fop->minimum,
- ec_truncate_open_cbk, fop, &fop->loc[0], O_RDWR, fop->fd,
- NULL);
+ ec_truncate_open_cbk, fop, &fop->loc[0], O_RDWR, fop->fd, NULL);
- return 1;
- }
- else
- {
- return ec_truncate_write(fop, fop->answer->mask);
+ return 0;
+ } else {
+ return ec_update_truncate_write(fop, fop->answer->mask);
}
}
-int32_t ec_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prestat,
- struct iatt *poststat, dict_t *xdata)
+int32_t
+ec_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prestat,
+ struct iatt *poststat, dict_t *xdata)
{
- return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno,
- prestat, poststat, xdata);
+ return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prestat,
+ poststat, xdata);
}
-void ec_wind_truncate(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_truncate(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -979,34 +1398,38 @@ void ec_wind_truncate(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
&fop->loc[0], fop->offset, fop->xdata);
}
-int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_truncate(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk;
+ off_t offset_down;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
fop->user_size = fop->offset;
- fop->offset = ec_adjust_size(fop->xl->private, fop->offset, 1);
+ ec_adjust_offset_up(fop->xl->private, &fop->offset, _gf_true);
+ fop->frag_range.first = fop->offset;
+ fop->frag_range.last = UINT64_MAX;
- /* Fall through */
+ /* Fall through */
case EC_STATE_LOCK:
- if (fop->id == GF_FOP_TRUNCATE)
- {
- ec_lock_prepare_inode(fop, &fop->loc[0], 1);
- }
- else
- {
- ec_lock_prepare_fd(fop, fop->fd, 1);
+ offset_down = fop->user_size;
+ ec_adjust_offset_down(fop->xl->private, &offset_down, _gf_true);
+
+ if (fop->id == GF_FOP_TRUNCATE) {
+ ec_lock_prepare_inode(
+ fop, &fop->loc[0],
+ EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
+ offset_down, EC_RANGE_FULL);
+ } else {
+ ec_lock_prepare_fd(
+ fop, fop->fd,
+ EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
+ offset_down, EC_RANGE_FULL);
}
ec_lock(fop);
- return EC_STATE_GET_SIZE_AND_VERSION;
-
- case EC_STATE_GET_SIZE_AND_VERSION:
- ec_prepare_update(fop);
-
return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH:
@@ -1015,43 +1438,30 @@ int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret < 0)
- {
- ec_fop_set_error(fop, cbk->op_errno);
- }
- else
- {
- ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2,
- cbk->count);
-
- cbk->iatt[0].ia_size = fop->pre_size;
- cbk->iatt[1].ia_size = fop->user_size;
- fop->post_size = fop->user_size;
- if ((fop->pre_size > fop->post_size) &&
- (fop->user_size != fop->offset))
- {
- if (!ec_truncate_clean(fop))
- {
- ec_fop_set_error(fop, EIO);
- }
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ int32_t err;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count);
+
+ /* This shouldn't fail because we have the inode locked. */
+ /* Inode size doesn't need to be updated under locks, because
+ * conflicting operations won't be in-flight
+ */
+ GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode,
+ &cbk->iatt[0].ia_size));
+ cbk->iatt[1].ia_size = fop->user_size;
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_set_inode_size(fop, fop->locks[0].lock->loc.inode,
+ fop->user_size));
+ if ((cbk->iatt[0].ia_size > cbk->iatt[1].ia_size) &&
+ (fop->user_size != fop->offset)) {
+ err = ec_truncate_clean(fop);
+ if (err != 0) {
+ ec_cbk_set_error(cbk, -err, _gf_false);
}
}
}
- else
- {
- ec_fop_set_error(fop, EIO);
- }
return EC_STATE_REPORT;
@@ -1060,24 +1470,17 @@ int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->id == GF_FOP_TRUNCATE)
- {
- if (fop->cbks.truncate != NULL)
- {
- fop->cbks.truncate(fop->req_frame, fop, fop->xl,
- cbk->op_ret, cbk->op_errno,
- &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ if (fop->id == GF_FOP_TRUNCATE) {
+ if (fop->cbks.truncate != NULL) {
+ QUORUM_CBK(fop->cbks.truncate, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
}
- }
- else
- {
- if (fop->cbks.ftruncate != NULL)
- {
- fop->cbks.ftruncate(fop->req_frame, fop, fop->xl,
- cbk->op_ret, cbk->op_errno,
- &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ } else {
+ if (fop->cbks.ftruncate != NULL) {
+ QUORUM_CBK(fop->cbks.ftruncate, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
}
}
@@ -1085,24 +1488,18 @@ int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_INIT:
case -EC_STATE_LOCK:
- case -EC_STATE_GET_SIZE_AND_VERSION:
case -EC_STATE_DISPATCH:
case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->id == GF_FOP_TRUNCATE)
- {
- if (fop->cbks.truncate != NULL)
- {
+ if (fop->id == GF_FOP_TRUNCATE) {
+ if (fop->cbks.truncate != NULL) {
fop->cbks.truncate(fop->req_frame, fop, fop->xl, -1,
fop->error, NULL, NULL, NULL);
}
- }
- else
- {
- if (fop->cbks.ftruncate != NULL)
- {
+ } else {
+ if (fop->cbks.ftruncate != NULL) {
fop->cbks.ftruncate(fop->req_frame, fop, fop->xl, -1,
fop->error, NULL, NULL, NULL);
}
@@ -1123,54 +1520,51 @@ int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_truncate(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_truncate_cbk_t func, void * data,
- loc_t * loc, off_t offset, dict_t * xdata)
+void
+ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_truncate_cbk_t func, void *data, loc_t *loc,
+ off_t offset, dict_t *xdata)
{
- ec_cbk_t callback = { .truncate = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.truncate = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(TRUNCATE) %p", frame);
+ gf_msg_trace("ec", 0, "EC(TRUNCATE) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_TRUNCATE,
- EC_FLAG_UPDATE_LOC_INODE, target, minimum,
- ec_wind_truncate, ec_manager_truncate, callback,
- data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_TRUNCATE, 0, target,
+ fop_flags, ec_wind_truncate, ec_manager_truncate,
+ callback, data);
+ if (fop == NULL) {
goto out;
}
fop->offset = offset;
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (xdata != NULL)
- {
- fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -1179,28 +1573,26 @@ void ec_truncate(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
}
}
/* FOP: ftruncate */
-int32_t ec_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prestat, struct iatt *poststat,
- dict_t *xdata)
+int32_t
+ec_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prestat,
+ struct iatt *poststat, dict_t *xdata)
{
- return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno,
- prestat, poststat, xdata);
+ return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prestat,
+ poststat, xdata);
}
-void ec_wind_ftruncate(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_ftruncate(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -1209,26 +1601,25 @@ void ec_wind_ftruncate(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
fop->fd, fop->offset, fop->xdata);
}
-void ec_ftruncate(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_ftruncate_cbk_t func, void * data,
- fd_t * fd, off_t offset, dict_t * xdata)
+void
+ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_ftruncate_cbk_t func, void *data, fd_t *fd,
+ off_t offset, dict_t *xdata)
{
- ec_cbk_t callback = { .ftruncate = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.ftruncate = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(FTRUNCATE) %p", frame);
+ gf_msg_trace("ec", 0, "EC(FTRUNCATE) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FTRUNCATE,
- EC_FLAG_UPDATE_FD_INODE, target, minimum,
- ec_wind_ftruncate, ec_manager_truncate,
- callback, data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FTRUNCATE, 0, target,
+ fop_flags, ec_wind_ftruncate,
+ ec_manager_truncate, callback, data);
+ if (fop == NULL) {
goto out;
}
@@ -1236,24 +1627,22 @@ void ec_ftruncate(call_frame_t * frame, xlator_t * this, uintptr_t target,
fop->offset = offset;
- if (fd != NULL)
- {
+ if (fd != NULL) {
fop->fd = fd_ref(fd);
- if (fop->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
- if (xdata != NULL)
- {
- fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -1262,35 +1651,100 @@ void ec_ftruncate(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
}
}
/* FOP: writev */
+static ec_stripe_t *
+ec_allocate_stripe(ec_t *ec, ec_stripe_list_t *stripe_cache)
+{
+ ec_stripe_t *stripe = NULL;
-int32_t ec_writev_merge_tail(call_frame_t * frame, void * cookie,
- xlator_t * this, int32_t op_ret, int32_t op_errno,
- struct iovec * vector, int32_t count,
- struct iatt * stbuf, struct iobref * iobref,
- dict_t * xdata)
+ if (stripe_cache->count >= stripe_cache->max) {
+ GF_ASSERT(!list_empty(&stripe_cache->lru));
+ stripe = list_first_entry(&stripe_cache->lru, ec_stripe_t, lru);
+ list_move_tail(&stripe->lru, &stripe_cache->lru);
+ GF_ATOMIC_INC(ec->stats.stripe_cache.evicts);
+ } else {
+ stripe = GF_MALLOC(sizeof(ec_stripe_t) + ec->stripe_size,
+ ec_mt_ec_stripe_t);
+ if (stripe != NULL) {
+ stripe_cache->count++;
+ list_add_tail(&stripe->lru, &stripe_cache->lru);
+ GF_ATOMIC_INC(ec->stats.stripe_cache.allocs);
+ } else {
+ GF_ATOMIC_INC(ec->stats.stripe_cache.errors);
+ }
+ }
+
+ return stripe;
+}
+
+static void
+ec_write_stripe_data(ec_t *ec, ec_fop_data_t *fop, ec_stripe_t *stripe)
{
- ec_t * ec = this->private;
- ec_fop_data_t * fop = frame->local;
- size_t size, base, tmp;
+ off_t base;
- if (op_ret >= 0)
- {
+ base = fop->size - ec->stripe_size;
+ memcpy(stripe->data, fop->vector[0].iov_base + base, ec->stripe_size);
+ stripe->frag_offset = fop->frag_range.last - ec->fragment_size;
+}
+
+static void
+ec_add_stripe_in_cache(ec_t *ec, ec_fop_data_t *fop)
+{
+ ec_inode_t *ctx = NULL;
+ ec_stripe_t *stripe = NULL;
+ ec_stripe_list_t *stripe_cache = NULL;
+ gf_boolean_t failed = _gf_true;
+
+ LOCK(&fop->fd->inode->lock);
+
+ ctx = __ec_inode_get(fop->fd->inode, fop->xl);
+ if (ctx == NULL) {
+ goto out;
+ }
+
+ stripe_cache = &ctx->stripe_cache;
+ if (stripe_cache->max > 0) {
+ stripe = ec_allocate_stripe(ec, stripe_cache);
+ if (stripe == NULL) {
+ goto out;
+ }
+
+ ec_write_stripe_data(ec, fop, stripe);
+ }
+
+ failed = _gf_false;
+
+out:
+ UNLOCK(&fop->fd->inode->lock);
+
+ if (failed) {
+ gf_msg(ec->xl->name, GF_LOG_DEBUG, ENOMEM, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to create and add stripe in cache");
+ }
+}
+
+int32_t
+ec_writev_merge_tail(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ ec_t *ec = this->private;
+ ec_fop_data_t *fop = frame->local;
+ uint64_t size, base, tmp;
+
+ if (op_ret >= 0) {
tmp = 0;
size = fop->size - fop->user_size - fop->head;
base = ec->stripe_size - size;
- if (op_ret > base)
- {
+ if (op_ret > base) {
tmp = min(op_ret - base, size);
ec_iov_copy_to(fop->vector[0].iov_base + fop->size - size, vector,
count, base, tmp);
@@ -1298,46 +1752,44 @@ int32_t ec_writev_merge_tail(call_frame_t * frame, void * cookie,
size -= tmp;
}
- if (size > 0)
- {
+ if (size > 0) {
memset(fop->vector[0].iov_base + fop->size - size, 0, size);
}
- }
+ if (ec->stripe_cache) {
+ ec_add_stripe_in_cache(ec, fop);
+ }
+ }
return 0;
}
-int32_t ec_writev_merge_head(call_frame_t * frame, void * cookie,
- xlator_t * this, int32_t op_ret, int32_t op_errno,
- struct iovec * vector, int32_t count,
- struct iatt * stbuf, struct iobref * iobref,
- dict_t * xdata)
+int32_t
+ec_writev_merge_head(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
{
- ec_t * ec = this->private;
- ec_fop_data_t * fop = frame->local;
- size_t size, base;
+ ec_t *ec = this->private;
+ ec_fop_data_t *fop = frame->local;
+ uint64_t size, base;
- if (op_ret >= 0)
- {
+ if (op_ret >= 0) {
size = fop->head;
base = 0;
- if (op_ret > 0)
- {
+ if (op_ret > 0) {
base = min(op_ret, size);
ec_iov_copy_to(fop->vector[0].iov_base, vector, count, 0, base);
size -= base;
}
- if (size > 0)
- {
+ if (size > 0) {
memset(fop->vector[0].iov_base + base, 0, size);
}
size = fop->size - fop->user_size - fop->head;
- if ((size > 0) && (fop->size == ec->stripe_size))
- {
+ if ((size > 0) && (fop->size == ec->stripe_size)) {
ec_writev_merge_tail(frame, cookie, this, op_ret, op_errno, vector,
count, stbuf, iobref, xdata);
}
@@ -1346,198 +1798,375 @@ int32_t ec_writev_merge_head(call_frame_t * frame, void * cookie,
return 0;
}
-void ec_writev_start(ec_fop_data_t *fop)
+static int
+ec_make_internal_fop_xdata(dict_t **xdata)
{
- ec_t *ec = fop->xl->private;
- struct iobref *iobref = NULL;
- struct iobuf *iobuf = NULL;
- void *ptr = NULL;
- ec_fd_t *ctx;
- fd_t *fd;
- size_t tail;
- uid_t uid;
- gid_t gid;
+ dict_t *dict = NULL;
- fd = fd_anonymous(fop->fd->inode);
- if (fd == NULL) {
- ec_fop_set_error(fop, EIO);
+ if (*xdata)
+ return 0;
- return;
- }
+ dict = dict_new();
+ if (!dict)
+ goto out;
- uid = fop->frame->root->uid;
- fop->frame->root->uid = 0;
- gid = fop->frame->root->gid;
- fop->frame->root->gid = 0;
+ if (dict_set_str(dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes"))
+ goto out;
- ctx = ec_fd_get(fop->fd, fop->xl);
- if (ctx != NULL) {
- if ((ctx->flags & O_APPEND) != 0) {
- fop->offset = fop->pre_size;
+ *xdata = dict;
+ return 0;
+out:
+ if (dict)
+ dict_unref(dict);
+ return -1;
+}
+
+static int32_t
+ec_writev_prepare_buffers(ec_t *ec, ec_fop_data_t *fop)
+{
+ struct iobref *iobref = NULL;
+ struct iovec *iov;
+ void *ptr;
+ int32_t err;
+
+ fop->user_size = iov_length(fop->vector, fop->int32);
+ fop->head = ec_adjust_offset_down(ec, &fop->offset, _gf_false);
+ fop->frag_range.first = fop->offset / ec->fragments;
+ fop->size = fop->user_size + fop->head;
+ ec_adjust_size_up(ec, &fop->size, _gf_false);
+ fop->frag_range.last = fop->frag_range.first + fop->size / ec->fragments;
+
+ if ((fop->int32 != 1) || (fop->head != 0) || (fop->size > fop->user_size) ||
+ !EC_ALIGN_CHECK(fop->vector[0].iov_base, EC_METHOD_WORD_SIZE)) {
+ err = ec_buffer_alloc(ec->xl, fop->size, &iobref, &ptr);
+ if (err != 0) {
+ goto out;
}
+
+ ec_iov_copy_to(ptr + fop->head, fop->vector, fop->int32, 0,
+ fop->user_size);
+
+ fop->vector[0].iov_base = ptr;
+ fop->vector[0].iov_len = fop->size;
+
+ iobref_unref(fop->buffers);
+ fop->buffers = iobref;
}
- fop->user_size = iov_length(fop->vector, fop->int32);
- fop->head = ec_adjust_offset(ec, &fop->offset, 0);
- fop->size = ec_adjust_size(ec, fop->user_size + fop->head, 0);
+ if (fop->int32 != 2) {
+ iov = GF_MALLOC(VECTORSIZE(2), gf_common_mt_iovec);
+ if (iov == NULL) {
+ err = -ENOMEM;
- iobref = iobref_new();
- if (iobref == NULL) {
- goto out;
+ goto out;
+ }
+ iov[0].iov_base = fop->vector[0].iov_base;
+ iov[0].iov_len = fop->vector[0].iov_len;
+
+ GF_FREE(fop->vector);
+ fop->vector = iov;
}
- iobuf = iobuf_get2(fop->xl->ctx->iobuf_pool, fop->size);
- if (iobuf == NULL) {
+
+ fop->vector[1].iov_len = fop->size / ec->fragments;
+ err = ec_buffer_alloc(ec->xl, fop->vector[1].iov_len * ec->nodes,
+ &fop->buffers, &fop->vector[1].iov_base);
+ if (err != 0) {
goto out;
}
- if (iobref_add(iobref, iobuf) != 0) {
- goto out;
+
+ err = 0;
+
+out:
+ return err;
+}
+
+static void
+ec_merge_stripe_head_locked(ec_t *ec, ec_fop_data_t *fop, ec_stripe_t *stripe)
+{
+ uint32_t head, size;
+
+ head = fop->head;
+ memcpy(fop->vector[0].iov_base, stripe->data, head);
+
+ size = ec->stripe_size - head;
+ if (size > fop->user_size) {
+ head += fop->user_size;
+ size = ec->stripe_size - head;
+ memcpy(fop->vector[0].iov_base + head, stripe->data + head, size);
}
+}
- ptr = iobuf->ptr + fop->head;
- ec_iov_copy_to(ptr, fop->vector, fop->int32, 0, fop->user_size);
+static void
+ec_merge_stripe_tail_locked(ec_t *ec, ec_fop_data_t *fop, ec_stripe_t *stripe)
+{
+ uint32_t head, tail;
+ off_t offset;
- fop->vector[0].iov_base = iobuf->ptr;
- fop->vector[0].iov_len = fop->size;
+ offset = fop->user_size + fop->head;
+ tail = fop->size - offset;
+ head = ec->stripe_size - tail;
- iobuf_unref(iobuf);
+ memcpy(fop->vector[0].iov_base + offset, stripe->data + head, tail);
+}
- iobref_unref(fop->buffers);
- fop->buffers = iobref;
+static ec_stripe_t *
+ec_get_stripe_from_cache_locked(ec_t *ec, ec_fop_data_t *fop,
+ uint64_t frag_offset)
+{
+ ec_inode_t *ctx = NULL;
+ ec_stripe_t *stripe = NULL;
+ ec_stripe_list_t *stripe_cache = NULL;
- if (fop->head > 0)
- {
- ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN, ec_writev_merge_head,
- NULL, fd, ec->stripe_size, fop->offset, 0, NULL);
+ ctx = __ec_inode_get(fop->fd->inode, fop->xl);
+ if (ctx == NULL) {
+ GF_ATOMIC_INC(ec->stats.stripe_cache.errors);
+ return NULL;
}
- tail = fop->size - fop->user_size - fop->head;
- if ((tail > 0) && ((fop->head == 0) || (fop->size > ec->stripe_size)))
+
+ stripe_cache = &ctx->stripe_cache;
+ list_for_each_entry(stripe, &stripe_cache->lru, lru)
{
- if (fop->pre_size > fop->offset + fop->head + fop->user_size)
- {
- ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN,
- ec_writev_merge_tail, NULL, fd, ec->stripe_size,
- fop->offset + fop->size - ec->stripe_size, 0, NULL);
- }
- else
- {
- memset(fop->vector[0].iov_base + fop->size - tail, 0, tail);
+ if (stripe->frag_offset == frag_offset) {
+ list_move_tail(&stripe->lru, &stripe_cache->lru);
+ GF_ATOMIC_INC(ec->stats.stripe_cache.hits);
+ return stripe;
}
}
- fop->frame->root->uid = uid;
- fop->frame->root->gid = gid;
+ GF_ATOMIC_INC(ec->stats.stripe_cache.misses);
- fd_unref(fd);
+ return NULL;
+}
- return;
+static gf_boolean_t
+ec_get_and_merge_stripe(ec_t *ec, ec_fop_data_t *fop, ec_stripe_part_t which)
+{
+ uint64_t frag_offset;
+ ec_stripe_t *stripe = NULL;
+ gf_boolean_t found = _gf_false;
-out:
- if (iobuf != NULL) {
- iobuf_unref(iobuf);
- }
- if (iobref != NULL) {
- iobref_unref(iobref);
+ if (!ec->stripe_cache) {
+ return found;
}
- fop->frame->root->uid = uid;
- fop->frame->root->gid = gid;
+ LOCK(&fop->fd->inode->lock);
+ if (which == EC_STRIPE_HEAD) {
+ frag_offset = fop->frag_range.first;
+ stripe = ec_get_stripe_from_cache_locked(ec, fop, frag_offset);
+ if (stripe) {
+ ec_merge_stripe_head_locked(ec, fop, stripe);
+ found = _gf_true;
+ }
+ }
- fd_unref(fd);
+ if (which == EC_STRIPE_TAIL) {
+ frag_offset = fop->frag_range.last - ec->fragment_size;
+ stripe = ec_get_stripe_from_cache_locked(ec, fop, frag_offset);
+ if (stripe) {
+ ec_merge_stripe_tail_locked(ec, fop, stripe);
+ found = _gf_true;
+ }
+ }
+ UNLOCK(&fop->fd->inode->lock);
- ec_fop_set_error(fop, EIO);
+ return found;
}
-int32_t ec_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prestat,
- struct iatt *poststat, dict_t *xdata)
+static uintptr_t
+ec_get_lock_good_mask(inode_t *inode, xlator_t *xl)
{
- ec_t *ec = NULL;
- if (this && this->private) {
- ec = this->private;
- if ((op_ret > 0) && ((op_ret % ec->fragment_size) != 0)) {
- op_ret = -1;
- op_errno = EIO;
- }
- }
- return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno,
- prestat, poststat, xdata);
+ ec_lock_t *lock = NULL;
+ ec_inode_t *ictx = NULL;
+ LOCK(&inode->lock);
+ {
+ ictx = __ec_inode_get(inode, xl);
+ if (ictx)
+ lock = ictx->inode_lock;
+ }
+ UNLOCK(&inode->lock);
+ if (lock)
+ return lock->good_mask;
+ return 0;
}
-void ec_wind_writev(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_writev_start(ec_fop_data_t *fop)
{
- ec_trace("WIND", fop, "idx=%d", idx);
+ ec_t *ec = fop->xl->private;
+ ec_fd_t *ctx;
+ fd_t *fd;
+ dict_t *xdata = NULL;
+ uint64_t tail, current;
+ int32_t err = -ENOMEM;
+ gf_boolean_t found_stripe = _gf_false;
- struct iovec vector[1];
- struct iobref * iobref = NULL;
- struct iobuf * iobuf = NULL;
- ssize_t size = 0, bufsize = 0;
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop, fop->fd->inode, &current));
- iobref = iobref_new();
- if (iobref == NULL)
- {
- goto out;
+ fd = fd_anonymous(fop->fd->inode);
+ if (fd == NULL) {
+ goto failed;
}
- size = fop->vector[0].iov_len;
- bufsize = size / ec->fragments;
+ fop->frame->root->uid = 0;
+ fop->frame->root->gid = 0;
- iobuf = iobuf_get2(fop->xl->ctx->iobuf_pool, bufsize);
- if (iobuf == NULL)
- {
- goto out;
+ ctx = ec_fd_get(fop->fd, fop->xl);
+ if (ctx != NULL) {
+ if ((ctx->flags & O_APPEND) != 0) {
+ /* Appending writes take full locks so size won't change because
+ * of any parallel operations
+ */
+ fop->offset = current;
+ }
}
- if (iobref_add(iobref, iobuf) != 0)
- {
- goto out;
+
+ err = ec_writev_prepare_buffers(ec, fop);
+ if (err != 0) {
+ goto failed_fd;
+ }
+ tail = fop->size - fop->user_size - fop->head;
+ if (fop->head > 0) {
+ if (current > fop->offset) {
+ found_stripe = ec_get_and_merge_stripe(ec, fop, EC_STRIPE_HEAD);
+ if (!found_stripe) {
+ if (ec_make_internal_fop_xdata(&xdata)) {
+ err = -ENOMEM;
+ goto failed_xdata;
+ }
+ ec_readv(fop->frame, fop->xl,
+ ec_get_lock_good_mask(fop->fd->inode, fop->xl),
+ EC_MINIMUM_MIN, ec_writev_merge_head, NULL, fd,
+ ec->stripe_size, fop->offset, 0, xdata);
+ }
+ } else {
+ memset(fop->vector[0].iov_base, 0, fop->head);
+ memset(fop->vector[0].iov_base + fop->size - tail, 0, tail);
+ if (ec->stripe_cache && (fop->size <= ec->stripe_size)) {
+ ec_add_stripe_in_cache(ec, fop);
+ }
+ }
+ }
+
+ if ((tail > 0) && ((fop->head == 0) || (fop->size > ec->stripe_size))) {
+ /* Current locking scheme will make sure the 'current' below will
+ * never decrease while the fop is in progress, so the checks will
+ * work as expected
+ */
+ if (current > fop->offset + fop->head + fop->user_size) {
+ found_stripe = ec_get_and_merge_stripe(ec, fop, EC_STRIPE_TAIL);
+ if (!found_stripe) {
+ if (ec_make_internal_fop_xdata(&xdata)) {
+ err = -ENOMEM;
+ goto failed_xdata;
+ }
+ ec_readv(fop->frame, fop->xl,
+ ec_get_lock_good_mask(fop->fd->inode, fop->xl),
+ EC_MINIMUM_MIN, ec_writev_merge_tail, NULL, fd,
+ ec->stripe_size,
+ fop->offset + fop->size - ec->stripe_size, 0, xdata);
+ }
+ } else {
+ memset(fop->vector[0].iov_base + fop->size - tail, 0, tail);
+ if (ec->stripe_cache) {
+ ec_add_stripe_in_cache(ec, fop);
+ }
+ }
}
- ec_method_encode(size, ec->fragments, idx, fop->vector[0].iov_base,
- iobuf->ptr);
+ err = 0;
- vector[0].iov_base = iobuf->ptr;
- vector[0].iov_len = bufsize;
+failed_xdata:
+ if (xdata) {
+ dict_unref(xdata);
+ }
+failed_fd:
+ fd_unref(fd);
+failed:
+ ec_fop_set_error(fop, -err);
+}
- iobuf_unref(iobuf);
+int32_t
+ec_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prestat, struct iatt *poststat,
+ dict_t *xdata)
+{
+ ec_t *ec = NULL;
+ if (this && this->private) {
+ ec = this->private;
+ if ((op_ret > 0) && ((op_ret % ec->fragment_size) != 0)) {
+ op_ret = -1;
+ op_errno = EIO;
+ }
+ }
+ return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prestat,
+ poststat, xdata);
+}
- STACK_WIND_COOKIE(fop->frame, ec_writev_cbk, (void *)(uintptr_t)idx,
- ec->xl_list[idx], ec->xl_list[idx]->fops->writev,
- fop->fd, vector, 1, fop->offset / ec->fragments,
- fop->uint32, iobref, fop->xdata);
+void
+ec_wind_writev(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
- iobref_unref(iobref);
+ struct iovec vector[1];
+ size_t size;
- return;
+ size = fop->vector[1].iov_len;
-out:
- if (iobuf != NULL)
- {
- iobuf_unref(iobuf);
- }
- if (iobref != NULL)
- {
- iobref_unref(iobref);
- }
+ vector[0].iov_base = fop->vector[1].iov_base + idx * size;
+ vector[0].iov_len = size;
- ec_writev_cbk(fop->frame, (void *)(uintptr_t)idx, fop->xl, -1, EIO, NULL,
- NULL, NULL);
+ STACK_WIND_COOKIE(fop->frame, ec_writev_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->writev, fop->fd,
+ vector, 1, fop->offset / ec->fragments, fop->uint32,
+ fop->buffers, fop->xdata);
}
-int32_t ec_manager_writev(ec_fop_data_t * fop, int32_t state)
+static void
+ec_writev_encode(ec_fop_data_t *fop)
{
- ec_cbk_data_t * cbk;
+ ec_t *ec = fop->xl->private;
+ void *blocks[ec->nodes];
+ uint32_t i;
- switch (state)
- {
+ blocks[0] = fop->vector[1].iov_base;
+ for (i = 1; i < ec->nodes; i++) {
+ blocks[i] = blocks[i - 1] + fop->vector[1].iov_len;
+ }
+ ec_method_encode(&ec->matrix, fop->vector[0].iov_len,
+ fop->vector[0].iov_base, blocks);
+}
+
+int32_t
+ec_manager_writev(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+ ec_fd_t *ctx = NULL;
+ ec_t *ec = fop->xl->private;
+ off_t fl_start = 0;
+ uint64_t fl_size = LONG_MAX;
+
+ switch (state) {
case EC_STATE_INIT:
case EC_STATE_LOCK:
- ec_lock_prepare_fd(fop, fop->fd, 1);
+ ctx = ec_fd_get(fop->fd, fop->xl);
+ if (ctx != NULL) {
+ if ((ctx->flags & O_APPEND) == 0) {
+ off_t user_size = 0;
+ off_t head = 0;
+
+ fl_start = fop->offset;
+ user_size = iov_length(fop->vector, fop->int32);
+ head = ec_adjust_offset_down(ec, &fl_start, _gf_true);
+ fl_size = user_size + head;
+ ec_adjust_size_up(ec, &fl_size, _gf_true);
+ }
+ }
+ ec_lock_prepare_fd(fop, fop->fd,
+ EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
+ fl_start, fl_size);
ec_lock(fop);
- return EC_STATE_GET_SIZE_AND_VERSION;
-
- case EC_STATE_GET_SIZE_AND_VERSION:
- ec_prepare_update(fop);
-
return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH:
@@ -1546,62 +2175,60 @@ int32_t ec_manager_writev(ec_fop_data_t * fop, int32_t state)
return EC_STATE_DELAYED_START;
case EC_STATE_DELAYED_START:
+ /* Restore uid, gid if they were changed to do some partial
+ * reads. */
+ fop->frame->root->uid = fop->uid;
+ fop->frame->root->gid = fop->gid;
+
+ ec_writev_encode(fop);
+
ec_dispatch_all(fop);
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL)
- {
- if (!ec_dict_combine(cbk, EC_COMBINE_XDATA))
- {
- if (cbk->op_ret >= 0)
- {
- cbk->op_ret = -1;
- cbk->op_errno = EIO;
- }
- }
- if (cbk->op_ret < 0)
- {
- ec_fop_set_error(fop, cbk->op_errno);
- }
- else
- {
- ec_t * ec = fop->xl->private;
- size_t size;
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ ec_t *ec = fop->xl->private;
+ uint64_t size;
- ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2,
- cbk->count);
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count);
+ /* This shouldn't fail because we have the inode locked. */
+ LOCK(&fop->fd->inode->lock);
+ {
+ GF_ASSERT(__ec_get_inode_size(fop, fop->fd->inode,
+ &cbk->iatt[0].ia_size));
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
size = fop->offset + fop->head + fop->user_size;
- if (size > fop->pre_size)
- {
- fop->post_size = size;
+ if (size > cbk->iatt[0].ia_size) {
+ /* Only update inode size if this is a top level fop.
+ * Otherwise this is an internal write and the top
+ * level fop should take care of the real inode size.
+ */
+ if (fop->parent == NULL) {
+ /* This shouldn't fail because we have the inode
+ * locked. */
+ GF_ASSERT(
+ __ec_set_inode_size(fop, fop->fd->inode, size));
+ }
+ cbk->iatt[1].ia_size = size;
}
+ }
+ UNLOCK(&fop->fd->inode->lock);
- cbk->iatt[0].ia_size = fop->pre_size;
- cbk->iatt[1].ia_size = fop->post_size;
-
+ if (fop->error == 0) {
cbk->op_ret *= ec->fragments;
- if (cbk->op_ret < fop->head)
- {
+ if (cbk->op_ret < fop->head) {
cbk->op_ret = 0;
- }
- else
- {
+ } else {
cbk->op_ret -= fop->head;
}
- if (cbk->op_ret > fop->user_size)
- {
+ if (cbk->op_ret > fop->user_size) {
cbk->op_ret = fop->user_size;
}
}
}
- else
- {
- ec_fop_set_error(fop, EIO);
- }
return EC_STATE_REPORT;
@@ -1610,25 +2237,30 @@ int32_t ec_manager_writev(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->cbks.writev != NULL)
- {
- fop->cbks.writev(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ if (fop->cbks.writev != NULL) {
+ QUORUM_CBK(fop->cbks.writev, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
+ case -EC_STATE_DELAYED_START:
+ /* We have failed while doing partial reads. We need to restore
+ * original uid, gid. */
+ fop->frame->root->uid = fop->uid;
+ fop->frame->root->gid = fop->gid;
+
+ /* Fall through */
+
case -EC_STATE_INIT:
case -EC_STATE_LOCK:
- case -EC_STATE_GET_SIZE_AND_VERSION:
case -EC_STATE_DISPATCH:
case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->cbks.writev != NULL)
- {
+ if (fop->cbks.writev != NULL) {
fop->cbks.writev(fop->req_frame, fop, fop->xl, -1, fop->error,
NULL, NULL, NULL);
}
@@ -1648,34 +2280,33 @@ int32_t ec_manager_writev(ec_fop_data_t * fop, int32_t state)
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_writev(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_writev_cbk_t func, void * data, fd_t * fd,
- struct iovec * vector, int32_t count, off_t offset,
- uint32_t flags, struct iobref * iobref, dict_t * xdata)
+void
+ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_writev_cbk_t func, void *data, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
- ec_cbk_t callback = { .writev = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.writev = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(WRITE) %p", frame);
+ gf_msg_trace("ec", 0, "EC(WRITE) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_WRITE,
- EC_FLAG_UPDATE_FD_INODE, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_WRITE, 0, target, fop_flags,
ec_wind_writev, ec_manager_writev, callback,
data);
- if (fop == NULL)
- {
+ if (fop == NULL) {
goto out;
}
@@ -1685,47 +2316,43 @@ void ec_writev(call_frame_t * frame, xlator_t * this, uintptr_t target,
fop->use_fd = 1;
- if (fd != NULL)
- {
+ if (fd != NULL) {
fop->fd = fd_ref(fd);
- if (fop->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
- if (count > 0)
- {
+ if (count > 0) {
fop->vector = iov_dup(vector, count);
- if (fop->vector == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to duplicate a "
- "vector list.");
+ if (fop->vector == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a "
+ "vector list.");
goto out;
}
fop->int32 = count;
}
- if (iobref != NULL)
- {
+ if (iobref != NULL) {
fop->buffers = iobref_ref(iobref);
- if (fop->buffers == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "buffer.");
+ if (fop->buffers == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_BUF_REF_FAIL,
+ "Failed to reference a "
+ "buffer.");
goto out;
}
}
- if (xdata != NULL)
- {
- fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -1734,12 +2361,9 @@ void ec_writev(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
}
}
diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c
index 2e301631b3f..601960d6154 100644
--- a/xlators/cluster/ec/src/ec-locks.c
+++ b/xlators/cluster/ec/src/ec-locks.c
@@ -8,71 +8,110 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
-#include "ec-method.h"
#include "ec-fops.h"
+#include "ec-messages.h"
#define EC_LOCK_MODE_NONE 0
-#define EC_LOCK_MODE_INC 1
-#define EC_LOCK_MODE_ALL 2
+#define EC_LOCK_MODE_INC 1
+#define EC_LOCK_MODE_ALL 2
-int32_t ec_lock_check(ec_fop_data_t * fop, ec_cbk_data_t ** cbk,
- uintptr_t * mask)
+int32_t
+ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
{
- ec_t * ec = fop->xl->private;
- ec_cbk_data_t * ans = NULL;
- uintptr_t locked = 0, notlocked = 0;
+ ec_t *ec = fop->xl->private;
+ ec_cbk_data_t *ans = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ uintptr_t locked = 0;
+ int32_t good = 0;
+ int32_t eagain = 0;
+ int32_t estale = 0;
int32_t error = -1;
+ /* There are some errors that we'll handle in an special way while trying
+ * to acquire a lock.
+ *
+ * EAGAIN: If it's found during a parallel non-blocking lock request, we
+ * consider that there's contention on the inode, so we consider
+ * the acquisition a failure and try again with a sequential
+ * blocking lock request. This will ensure that we get a lock on
+ * as many bricks as possible (ignoring EAGAIN here would cause
+ * unnecessary triggers of self-healing).
+ *
+ * If it's found during a sequential blocking lock request, it's
+ * considered an error. Lock will only succeed if there are
+ * enough other bricks locked.
+ *
+ * ESTALE: This can appear during parallel or sequential lock request if
+ * the inode has just been unlinked. We consider this error is
+ * not recoverable, but we also don't consider it as fatal. So,
+ * if it happens during parallel lock, we won't attempt a
+ * sequential one unless there are EAGAIN errors on other
+ * bricks (and are enough to form a quorum), but if we reach
+ * quorum counting the ESTALE bricks, we consider the whole
+ * result of the operation is ESTALE instead of EIO.
+ */
+
list_for_each_entry(ans, &fop->cbk_list, list)
{
- if (ans->op_ret >= 0)
- {
- if (locked != 0)
- {
+ if (ans->op_ret >= 0) {
+ if (locked != 0) {
error = EIO;
}
locked |= ans->mask;
- *cbk = ans;
- }
- else if (ans->op_errno == EAGAIN)
- {
- notlocked |= ans->mask;
+ good = ans->count;
+ cbk = ans;
+ } else if (ans->op_errno == ESTALE) {
+ estale += ans->count;
+ } else if ((ans->op_errno == EAGAIN) &&
+ (fop->uint32 != EC_LOCK_MODE_INC)) {
+ eagain += ans->count;
}
}
- if (error == -1)
- {
- if (ec_bits_count(locked | notlocked) >= ec->fragments)
- {
- if (notlocked == 0)
- {
- fop->answer = *cbk;
+ if (error == -1) {
+ /* If we have enough quorum with succeeded and EAGAIN answers, we
+ * ignore for now any ESTALE answer. If there are EAGAIN answers,
+ * we retry with a sequential blocking lock request if needed.
+ * Otherwise we succeed. */
+ if ((good + eagain) >= ec->fragments) {
+ if (eagain == 0) {
+ if (fop->answer == NULL) {
+ fop->answer = cbk;
+ }
- ec_update_bad(fop, locked);
+ ec_update_good(fop, locked);
error = 0;
- }
- else
- {
- if (fop->uint32 == EC_LOCK_MODE_NONE)
- {
- error = EAGAIN;
- }
- else
- {
- fop->uint32 = EC_LOCK_MODE_INC;
+ } else {
+ switch (fop->uint32) {
+ case EC_LOCK_MODE_NONE:
+ error = EAGAIN;
+ break;
+ case EC_LOCK_MODE_ALL:
+ fop->uint32 = EC_LOCK_MODE_INC;
+ break;
+ default:
+ /* This shouldn't happen because eagain cannot be > 0
+ * when fop->uint32 is EC_LOCK_MODE_INC. */
+ error = EIO;
+ break;
}
}
- }
- else
- {
- error = EIO;
+ } else {
+ /* We have been unable to find enough candidates that will be able
+ * to take the lock. If we have quorum on some answer, we return
+ * it. Otherwise we check if ESTALE answers allow us to reach
+ * quorum. If so, we return ESTALE. */
+ if (fop->answer && fop->answer->op_ret < 0) {
+ error = fop->answer->op_errno;
+ } else if ((good + eagain + estale) >= ec->fragments) {
+ error = ESTALE;
+ } else {
+ error = EIO;
+ }
}
}
@@ -81,47 +120,26 @@ int32_t ec_lock_check(ec_fop_data_t * fop, ec_cbk_data_t ** cbk,
return error;
}
-uintptr_t ec_lock_handler(ec_fop_data_t * fop, ec_cbk_data_t * cbk,
- ec_combine_f combine)
+int32_t
+ec_lock_unlocked(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- uintptr_t mask = 0;
-
- if (fop->uint32 == EC_LOCK_MODE_INC)
- {
- if (cbk->op_ret < 0)
- {
- if (cbk->op_errno != ENOTCONN)
- {
- mask = fop->mask & ~fop->remaining & ~cbk->mask;
- fop->remaining = 0;
- }
- }
- }
-
- ec_combine(cbk, combine);
-
- return mask;
-}
-
-int32_t ec_lock_unlocked(call_frame_t * frame, void * cookie,
- xlator_t * this, int32_t op_ret, int32_t op_errno,
- dict_t * xdata)
-{
- if (op_ret < 0)
- {
- gf_log(this->name, GF_LOG_WARNING, "Failed to unlock an entry/inode");
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_UNLOCK_FAILED,
+ "Failed to unlock an entry/inode");
}
return 0;
}
-int32_t ec_lock_lk_unlocked(call_frame_t * frame, void * cookie,
- xlator_t * this, int32_t op_ret, int32_t op_errno,
- struct gf_flock * flock, dict_t * xdata)
+int32_t
+ec_lock_lk_unlocked(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *flock,
+ dict_t *xdata)
{
- if (op_ret < 0)
- {
- gf_log(this->name, GF_LOG_WARNING, "Failed to unlock an lk");
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_LK_UNLOCK_FAILED,
+ "Failed to unlock an lk");
}
return 0;
@@ -129,11 +147,12 @@ int32_t ec_lock_lk_unlocked(call_frame_t * frame, void * cookie,
/* FOP: entrylk */
-int32_t ec_entrylk_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, dict_t * xdata)
+int32_t
+ec_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- ec_fop_data_t * fop = NULL;
- ec_cbk_data_t * cbk = NULL;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
@@ -143,46 +162,36 @@ int32_t ec_entrylk_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
fop = frame->local;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_ENTRYLK, idx, op_ret,
op_errno);
- if (cbk != NULL)
- {
- uintptr_t mask;
-
- if (xdata != NULL)
- {
+ if (cbk != NULL) {
+ if (xdata != NULL) {
cbk->xdata = dict_ref(xdata);
- if (cbk->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
}
- mask = ec_lock_handler(fop, cbk, NULL);
- if (mask != 0)
- {
- ec_entrylk(fop->req_frame, fop->xl, mask, 1, ec_lock_unlocked,
- NULL, fop->str[0], &fop->loc[0], fop->str[1],
- ENTRYLK_UNLOCK, fop->entrylk_type, fop->xdata);
- }
+ ec_combine(cbk, NULL);
}
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_complete(fop);
}
return 0;
}
-void ec_wind_entrylk(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_entrylk(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -192,20 +201,19 @@ void ec_wind_entrylk(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
fop->entrylk_type, fop->xdata);
}
-int32_t ec_manager_entrylk(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_entrylk(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
- if (fop->entrylk_cmd == ENTRYLK_LOCK)
- {
+ if (fop->entrylk_cmd == ENTRYLK_LOCK) {
fop->uint32 = EC_LOCK_MODE_ALL;
fop->entrylk_cmd = ENTRYLK_LOCK_NB;
}
- /* Fall through */
+ /* Fall through */
case EC_STATE_DISPATCH:
ec_dispatch_all(fop);
@@ -213,46 +221,37 @@ int32_t ec_manager_entrylk(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if ((cbk == NULL) ||
- ((cbk->op_ret < 0) && (cbk->op_errno == EAGAIN)))
- {
+ case -EC_STATE_PREPARE_ANSWER:
+ if (fop->entrylk_cmd != ENTRYLK_UNLOCK) {
uintptr_t mask;
- fop->error = ec_lock_check(fop, &cbk, &mask);
- if (fop->error != 0)
- {
- if (mask != 0)
- {
- if (fop->id == GF_FOP_ENTRYLK)
- {
- ec_entrylk(fop->req_frame, fop->xl, mask, 1,
- ec_lock_unlocked, NULL, fop->str[0],
- &fop->loc[0], fop->str[1],
- ENTRYLK_UNLOCK, fop->entrylk_type,
- fop->xdata);
- }
- else
- {
- ec_fentrylk(fop->req_frame, fop->xl, mask, 1,
+ ec_fop_set_error(fop, ec_lock_check(fop, &mask));
+ if (fop->error != 0) {
+ if (mask != 0) {
+ if (fop->id == GF_FOP_ENTRYLK) {
+ ec_entrylk(
+ fop->frame, fop->xl, mask, 1, ec_lock_unlocked,
+ NULL, fop->str[0], &fop->loc[0], fop->str[1],
+ ENTRYLK_UNLOCK, fop->entrylk_type, fop->xdata);
+ } else {
+ ec_fentrylk(fop->frame, fop->xl, mask, 1,
ec_lock_unlocked, NULL, fop->str[0],
fop->fd, fop->str[1], ENTRYLK_UNLOCK,
fop->entrylk_type, fop->xdata);
}
}
- if (fop->error > 0)
- {
- return EC_STATE_REPORT;
- }
+ if (fop->error < 0) {
+ fop->error = 0;
- fop->error = 0;
+ fop->entrylk_cmd = ENTRYLK_LOCK;
- fop->entrylk_cmd = ENTRYLK_LOCK;
+ ec_dispatch_inc(fop);
- ec_dispatch_inc(fop);
-
- return EC_STATE_PREPARE_ANSWER;
+ return EC_STATE_PREPARE_ANSWER;
+ }
}
+ } else {
+ ec_fop_prepare_answer(fop, _gf_true);
}
return EC_STATE_REPORT;
@@ -262,125 +261,104 @@ int32_t ec_manager_entrylk(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->id == GF_FOP_ENTRYLK)
- {
- if (fop->cbks.entrylk != NULL)
- {
- fop->cbks.entrylk(fop->req_frame, fop, fop->xl,
- cbk->op_ret, cbk->op_errno, cbk->xdata);
+ if (fop->id == GF_FOP_ENTRYLK) {
+ if (fop->cbks.entrylk != NULL) {
+ fop->cbks.entrylk(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->xdata);
}
- }
- else
- {
- if (fop->cbks.fentrylk != NULL)
- {
+ } else {
+ if (fop->cbks.fentrylk != NULL) {
fop->cbks.fentrylk(fop->req_frame, fop, fop->xl,
cbk->op_ret, cbk->op_errno, cbk->xdata);
}
}
- ec_wait_winds(fop);
-
return EC_STATE_END;
case -EC_STATE_INIT:
case -EC_STATE_DISPATCH:
- case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->id == GF_FOP_ENTRYLK)
- {
- if (fop->cbks.entrylk != NULL)
- {
+ if (fop->id == GF_FOP_ENTRYLK) {
+ if (fop->cbks.entrylk != NULL) {
fop->cbks.entrylk(fop->req_frame, fop, fop->xl, -1,
fop->error, NULL);
}
- }
- else
- {
- if (fop->cbks.fentrylk != NULL)
- {
+ } else {
+ if (fop->cbks.fentrylk != NULL) {
fop->cbks.fentrylk(fop->req_frame, fop, fop->xl, -1,
fop->error, NULL);
}
}
- ec_wait_winds(fop);
-
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_entrylk(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_entrylk_cbk_t func, void * data,
- const char * volume, loc_t * loc, const char * basename,
- entrylk_cmd cmd, entrylk_type type, dict_t * xdata)
+void
+ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_entrylk_cbk_t func, void *data,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- ec_cbk_t callback = { .entrylk = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.entrylk = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(ENTRYLK) %p", frame);
+ gf_msg_trace("ec", 0, "EC(ENTRYLK) %p", frame);
- VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_ENTRYLK,
- EC_FLAG_UPDATE_LOC_INODE, target, minimum,
- ec_wind_entrylk, ec_manager_entrylk, callback,
- data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_ENTRYLK, 0, target,
+ fop_flags, ec_wind_entrylk, ec_manager_entrylk,
+ callback, data);
+ if (fop == NULL) {
goto out;
}
fop->entrylk_cmd = cmd;
fop->entrylk_type = type;
- if (volume != NULL)
- {
+ if (volume != NULL) {
fop->str[0] = gf_strdup(volume);
- if (fop->str[0] == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to duplicate a string.");
+ if (fop->str[0] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
goto out;
}
}
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (basename != NULL)
- {
+ if (basename != NULL) {
fop->str[1] = gf_strdup(basename);
- if (fop->str[1] == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to duplicate a string.");
+ if (fop->str[1] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
goto out;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -389,23 +367,21 @@ void ec_entrylk(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
}
}
/* FOP: fentrylk */
-int32_t ec_fentrylk_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, dict_t * xdata)
+int32_t
+ec_fentrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- ec_fop_data_t * fop = NULL;
- ec_cbk_data_t * cbk = NULL;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
@@ -415,46 +391,36 @@ int32_t ec_fentrylk_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
fop = frame->local;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FENTRYLK, idx, op_ret,
op_errno);
- if (cbk != NULL)
- {
- uintptr_t mask;
-
- if (xdata != NULL)
- {
+ if (cbk != NULL) {
+ if (xdata != NULL) {
cbk->xdata = dict_ref(xdata);
- if (cbk->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
}
- mask = ec_lock_handler(fop, cbk, NULL);
- if (mask != 0)
- {
- ec_fentrylk(fop->req_frame, fop->xl, mask, 1, ec_lock_unlocked,
- NULL, fop->str[0], fop->fd, fop->str[1],
- ENTRYLK_UNLOCK, fop->entrylk_type, fop->xdata);
- }
+ ec_combine(cbk, NULL);
}
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_complete(fop);
}
return 0;
}
-void ec_wind_fentrylk(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_fentrylk(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -464,27 +430,25 @@ void ec_wind_fentrylk(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
fop->entrylk_type, fop->xdata);
}
-void ec_fentrylk(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_fentrylk_cbk_t func, void * data,
- const char * volume, fd_t * fd, const char * basename,
- entrylk_cmd cmd, entrylk_type type, dict_t * xdata)
+void
+ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fentrylk_cbk_t func, void *data,
+ const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
{
- ec_cbk_t callback = { .fentrylk = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.fentrylk = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(FENTRYLK) %p", frame);
+ gf_msg_trace("ec", 0, "EC(FENTRYLK) %p", frame);
- VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FENTRYLK,
- EC_FLAG_UPDATE_FD_INODE, target, minimum,
- ec_wind_fentrylk, ec_manager_entrylk, callback,
- data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FENTRYLK, 0, target,
+ fop_flags, ec_wind_fentrylk, ec_manager_entrylk,
+ callback, data);
+ if (fop == NULL) {
goto out;
}
@@ -493,44 +457,40 @@ void ec_fentrylk(call_frame_t * frame, xlator_t * this, uintptr_t target,
fop->entrylk_cmd = cmd;
fop->entrylk_type = type;
- if (volume != NULL)
- {
+ if (volume != NULL) {
fop->str[0] = gf_strdup(volume);
- if (fop->str[0] == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to duplicate a string.");
+ if (fop->str[0] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
goto out;
}
}
- if (fd != NULL)
- {
+ if (fd != NULL) {
fop->fd = fd_ref(fd);
- if (fop->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
- if (basename != NULL)
- {
+ if (basename != NULL) {
fop->str[1] = gf_strdup(basename);
- if (fop->str[1] == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to duplicate a string.");
+ if (fop->str[1] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
goto out;
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -539,23 +499,21 @@ void ec_fentrylk(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
}
}
/* FOP: inodelk */
-int32_t ec_inodelk_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, dict_t * xdata)
+int32_t
+ec_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- ec_fop_data_t * fop = NULL;
- ec_cbk_data_t * cbk = NULL;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
@@ -565,56 +523,36 @@ int32_t ec_inodelk_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
fop = frame->local;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_INODELK, idx, op_ret,
op_errno);
- if (cbk != NULL)
- {
- uintptr_t mask;
-
- if (xdata != NULL)
- {
+ if (cbk != NULL) {
+ if (xdata != NULL) {
cbk->xdata = dict_ref(xdata);
- if (cbk->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
}
- mask = ec_lock_handler(fop, cbk, NULL);
- if (mask != 0)
- {
- ec_t * ec = fop->xl->private;
- struct gf_flock flock;
-
- flock.l_type = F_UNLCK;
- flock.l_whence = fop->flock.l_whence;
- flock.l_start = fop->flock.l_start * ec->fragments;
- flock.l_len = fop->flock.l_len * ec->fragments;
- flock.l_pid = 0;
- flock.l_owner.len = 0;
-
- ec_inodelk(fop->req_frame, fop->xl, mask, 1, ec_lock_unlocked,
- NULL, fop->str[0], &fop->loc[0], F_SETLK, &flock,
- fop->xdata);
- }
+ ec_combine(cbk, NULL);
}
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_complete(fop);
}
return 0;
}
-void ec_wind_inodelk(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_inodelk(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -624,24 +562,22 @@ void ec_wind_inodelk(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
fop->xdata);
}
-int32_t ec_manager_inodelk(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_inodelk(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
- fop->flock.l_len += ec_adjust_offset(fop->xl->private,
- &fop->flock.l_start, 1);
- fop->flock.l_len = ec_adjust_size(fop->xl->private,
- fop->flock.l_len, 1);
- if ((fop->int32 == F_SETLKW) && (fop->flock.l_type != F_UNLCK))
- {
+ fop->flock.l_len += ec_adjust_offset_down(
+ fop->xl->private, &fop->flock.l_start, _gf_true);
+ ec_adjust_offset_up(fop->xl->private, &fop->flock.l_len, _gf_true);
+ if ((fop->int32 == F_SETLKW) && (fop->flock.l_type != F_UNLCK)) {
fop->uint32 = EC_LOCK_MODE_ALL;
fop->int32 = F_SETLK;
}
- /* Fall through */
+ /* Fall through */
case EC_STATE_DISPATCH:
ec_dispatch_all(fop);
@@ -649,18 +585,14 @@ int32_t ec_manager_inodelk(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if ((cbk == NULL) ||
- ((cbk->op_ret < 0) && (cbk->op_errno == EAGAIN)))
- {
+ case -EC_STATE_PREPARE_ANSWER:
+ if (fop->flock.l_type != F_UNLCK) {
uintptr_t mask;
- fop->error = ec_lock_check(fop, &cbk, &mask);
- if (fop->error != 0)
- {
- if (mask != 0)
- {
- ec_t * ec = fop->xl->private;
+ ec_fop_set_error(fop, ec_lock_check(fop, &mask));
+ if (fop->error != 0) {
+ if (mask != 0) {
+ ec_t *ec = fop->xl->private;
struct gf_flock flock;
flock.l_type = F_UNLCK;
@@ -670,33 +602,31 @@ int32_t ec_manager_inodelk(ec_fop_data_t * fop, int32_t state)
flock.l_pid = 0;
flock.l_owner.len = 0;
- if (fop->id == GF_FOP_INODELK)
- {
- ec_inodelk(fop->req_frame, fop->xl, mask, 1,
+ if (fop->id == GF_FOP_INODELK) {
+ ec_inodelk(fop->frame, fop->xl,
+ &fop->frame->root->lk_owner, mask, 1,
ec_lock_unlocked, NULL, fop->str[0],
&fop->loc[0], F_SETLK, &flock,
fop->xdata);
- }
- else
- {
- ec_finodelk(fop->req_frame, fop->xl, mask, 1,
+ } else {
+ ec_finodelk(fop->frame, fop->xl,
+ &fop->frame->root->lk_owner, mask, 1,
ec_lock_unlocked, NULL, fop->str[0],
fop->fd, F_SETLK, &flock, fop->xdata);
}
}
- if (fop->error > 0)
- {
- return EC_STATE_REPORT;
- }
+ if (fop->error < 0) {
+ fop->error = 0;
- fop->error = 0;
+ fop->int32 = F_SETLKW;
- fop->int32 = F_SETLKW;
+ ec_dispatch_inc(fop);
- ec_dispatch_inc(fop);
-
- return EC_STATE_PREPARE_ANSWER;
+ return EC_STATE_PREPARE_ANSWER;
+ }
}
+ } else {
+ ec_fop_prepare_answer(fop, _gf_true);
}
return EC_STATE_REPORT;
@@ -706,128 +636,108 @@ int32_t ec_manager_inodelk(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->id == GF_FOP_INODELK)
- {
- if (fop->cbks.inodelk != NULL)
- {
- fop->cbks.inodelk(fop->req_frame, fop, fop->xl,
- cbk->op_ret, cbk->op_errno, cbk->xdata);
+ if (fop->id == GF_FOP_INODELK) {
+ if (fop->cbks.inodelk != NULL) {
+ fop->cbks.inodelk(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->xdata);
}
- }
- else
- {
- if (fop->cbks.finodelk != NULL)
- {
+ } else {
+ if (fop->cbks.finodelk != NULL) {
fop->cbks.finodelk(fop->req_frame, fop, fop->xl,
cbk->op_ret, cbk->op_errno, cbk->xdata);
}
}
- ec_wait_winds(fop);
-
return EC_STATE_END;
case -EC_STATE_INIT:
case -EC_STATE_DISPATCH:
- case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->id == GF_FOP_INODELK)
- {
- if (fop->cbks.inodelk != NULL)
- {
+ if (fop->id == GF_FOP_INODELK) {
+ if (fop->cbks.inodelk != NULL) {
fop->cbks.inodelk(fop->req_frame, fop, fop->xl, -1,
fop->error, NULL);
}
- }
- else
- {
- if (fop->cbks.finodelk != NULL)
- {
+ } else {
+ if (fop->cbks.finodelk != NULL) {
fop->cbks.finodelk(fop->req_frame, fop, fop->xl, -1,
fop->error, NULL);
}
}
- ec_wait_winds(fop);
-
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_inodelk(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_inodelk_cbk_t func, void * data,
- const char * volume, loc_t * loc, int32_t cmd,
- struct gf_flock * flock, dict_t * xdata)
+void
+ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
+ uintptr_t target, uint32_t fop_flags, fop_inodelk_cbk_t func,
+ void *data, const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
- ec_cbk_t callback = { .inodelk = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.inodelk = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(INODELK) %p", frame);
+ gf_msg_trace("ec", 0, "EC(INODELK) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_INODELK,
- EC_FLAG_UPDATE_LOC_INODE, target, minimum,
- ec_wind_inodelk, ec_manager_inodelk, callback,
- data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_INODELK, 0, target,
+ fop_flags, ec_wind_inodelk, ec_manager_inodelk,
+ callback, data);
+ if (fop == NULL) {
goto out;
}
fop->int32 = cmd;
+ ec_owner_copy(fop->frame, owner);
- if (volume != NULL)
- {
+ if (volume != NULL) {
fop->str[0] = gf_strdup(volume);
- if (fop->str[0] == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to duplicate a string.");
+ if (fop->str[0] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
goto out;
}
}
- if (loc != NULL)
- {
- if (loc_copy(&fop->loc[0], loc) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location.");
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
goto out;
}
}
- if (flock != NULL)
- {
+ if (flock != NULL) {
fop->flock.l_type = flock->l_type;
fop->flock.l_whence = flock->l_whence;
fop->flock.l_start = flock->l_start;
fop->flock.l_len = flock->l_len;
fop->flock.l_pid = flock->l_pid;
fop->flock.l_owner.len = flock->l_owner.len;
- if (flock->l_owner.len > 0)
- {
+ if (flock->l_owner.len > 0) {
memcpy(fop->flock.l_owner.data, flock->l_owner.data,
flock->l_owner.len);
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -836,23 +746,21 @@ void ec_inodelk(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
}
}
/* FOP: finodelk */
-int32_t ec_finodelk_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, dict_t * xdata)
+int32_t
+ec_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- ec_fop_data_t * fop = NULL;
- ec_cbk_data_t * cbk = NULL;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
@@ -862,56 +770,36 @@ int32_t ec_finodelk_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
fop = frame->local;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FINODELK, idx, op_ret,
op_errno);
- if (cbk != NULL)
- {
- uintptr_t mask;
-
- if (xdata != NULL)
- {
+ if (cbk != NULL) {
+ if (xdata != NULL) {
cbk->xdata = dict_ref(xdata);
- if (cbk->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
}
- mask = ec_lock_handler(fop, cbk, NULL);
- if (mask != 0)
- {
- ec_t * ec = fop->xl->private;
- struct gf_flock flock;
-
- flock.l_type = F_UNLCK;
- flock.l_whence = fop->flock.l_whence;
- flock.l_start = fop->flock.l_start * ec->fragments;
- flock.l_len = fop->flock.l_len * ec->fragments;
- flock.l_pid = 0;
- flock.l_owner.len = 0;
-
- ec_finodelk(fop->req_frame, fop->xl, mask, 1, ec_lock_unlocked,
- NULL, fop->str[0], fop->fd, F_SETLK, &flock,
- fop->xdata);
- }
+ ec_combine(cbk, NULL);
}
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_complete(fop);
}
return 0;
}
-void ec_wind_finodelk(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_finodelk(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -921,76 +809,71 @@ void ec_wind_finodelk(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
fop->xdata);
}
-void ec_finodelk(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_finodelk_cbk_t func, void * data,
- const char * volume, fd_t * fd, int32_t cmd,
- struct gf_flock * flock, dict_t * xdata)
+void
+ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
+ uintptr_t target, uint32_t fop_flags, fop_finodelk_cbk_t func,
+ void *data, const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
- ec_cbk_t callback = { .finodelk = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.finodelk = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(FINODELK) %p", frame);
+ gf_msg_trace("ec", 0, "EC(FINODELK) %p", frame);
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FINODELK,
- EC_FLAG_UPDATE_FD_INODE, target, minimum,
- ec_wind_finodelk, ec_manager_inodelk, callback,
- data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FINODELK, 0, target,
+ fop_flags, ec_wind_finodelk, ec_manager_inodelk,
+ callback, data);
+ if (fop == NULL) {
goto out;
}
fop->use_fd = 1;
fop->int32 = cmd;
+ ec_owner_copy(fop->frame, owner);
- if (volume != NULL)
- {
+ if (volume != NULL) {
fop->str[0] = gf_strdup(volume);
- if (fop->str[0] == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to duplicate a string.");
+ if (fop->str[0] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
goto out;
}
}
- if (fd != NULL)
- {
+ if (fd != NULL) {
fop->fd = fd_ref(fd);
- if (fop->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
- if (flock != NULL)
- {
+ if (flock != NULL) {
fop->flock.l_type = flock->l_type;
fop->flock.l_whence = flock->l_whence;
fop->flock.l_start = flock->l_start;
fop->flock.l_len = flock->l_len;
fop->flock.l_pid = flock->l_pid;
fop->flock.l_owner.len = flock->l_owner.len;
- if (flock->l_owner.len > 0)
- {
+ if (flock->l_owner.len > 0) {
memcpy(fop->flock.l_owner.data, flock->l_owner.data,
flock->l_owner.len);
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -999,25 +882,22 @@ void ec_finodelk(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
}
}
/* FOP: lk */
-int32_t ec_combine_lk(ec_fop_data_t * fop, ec_cbk_data_t * dst,
- ec_cbk_data_t * src)
+int32_t
+ec_combine_lk(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
{
- if (!ec_flock_compare(&dst->flock, &src->flock))
- {
- gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching lock in "
- "answers of 'GF_FOP_LK'");
+ if (!ec_flock_compare(&dst->flock, &src->flock)) {
+ gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_LOCK_MISMATCH,
+ "Mismatching lock in "
+ "answers of 'GF_FOP_LK'");
return 0;
}
@@ -1025,12 +905,12 @@ int32_t ec_combine_lk(ec_fop_data_t * fop, ec_cbk_data_t * dst,
return 1;
}
-int32_t ec_lk_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, struct gf_flock * flock,
- dict_t * xdata)
+int32_t
+ec_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *flock, dict_t *xdata)
{
- ec_fop_data_t * fop = NULL;
- ec_cbk_data_t * cbk = NULL;
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t idx = (int32_t)(uintptr_t)cookie;
VALIDATE_OR_GOTO(this, out);
@@ -1040,72 +920,50 @@ int32_t ec_lk_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
fop = frame->local;
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
- frame, op_ret, op_errno);
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_LK, idx, op_ret,
op_errno);
- if (cbk != NULL)
- {
- uintptr_t mask;
-
- if (op_ret >= 0)
- {
- if (flock != NULL)
- {
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ if (flock != NULL) {
cbk->flock.l_type = flock->l_type;
cbk->flock.l_whence = flock->l_whence;
cbk->flock.l_start = flock->l_start;
cbk->flock.l_len = flock->l_len;
cbk->flock.l_pid = flock->l_pid;
cbk->flock.l_owner.len = flock->l_owner.len;
- if (flock->l_owner.len > 0)
- {
+ if (flock->l_owner.len > 0) {
memcpy(cbk->flock.l_owner.data, flock->l_owner.data,
flock->l_owner.len);
}
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
cbk->xdata = dict_ref(xdata);
- if (cbk->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
}
- mask = ec_lock_handler(fop, cbk, ec_combine_lk);
- if (mask != 0)
- {
- ec_t * ec = fop->xl->private;
- struct gf_flock flock;
-
- flock.l_type = F_UNLCK;
- flock.l_whence = fop->flock.l_whence;
- flock.l_start = fop->flock.l_start * ec->fragments;
- flock.l_len = fop->flock.l_len * ec->fragments;
- flock.l_pid = 0;
- flock.l_owner.len = 0;
-
- ec_lk(fop->req_frame, fop->xl, mask, 1, ec_lock_lk_unlocked, NULL,
- fop->fd, F_SETLK, &flock, fop->xdata);
- }
+ ec_combine(cbk, ec_combine_lk);
}
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_complete(fop);
}
return 0;
}
-void ec_wind_lk(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
+void
+ec_wind_lk(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
{
ec_trace("WIND", fop, "idx=%d", idx);
@@ -1114,24 +972,19 @@ void ec_wind_lk(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
fop->int32, &fop->flock, fop->xdata);
}
-int32_t ec_manager_lk(ec_fop_data_t * fop, int32_t state)
+int32_t
+ec_manager_lk(ec_fop_data_t *fop, int32_t state)
{
- ec_cbk_data_t * cbk;
+ ec_cbk_data_t *cbk;
- switch (state)
- {
+ switch (state) {
case EC_STATE_INIT:
- fop->flock.l_len += ec_adjust_offset(fop->xl->private,
- &fop->flock.l_start, 1);
- fop->flock.l_len = ec_adjust_size(fop->xl->private,
- fop->flock.l_len, 1);
- if ((fop->int32 == F_SETLKW) && (fop->flock.l_type != F_UNLCK))
- {
+ if ((fop->int32 == F_SETLKW) && (fop->flock.l_type != F_UNLCK)) {
fop->uint32 = EC_LOCK_MODE_ALL;
fop->int32 = F_SETLK;
}
- /* Fall through */
+ /* Fall through */
case EC_STATE_DISPATCH:
ec_dispatch_all(fop);
@@ -1139,44 +992,38 @@ int32_t ec_manager_lk(ec_fop_data_t * fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if ((cbk == NULL) ||
- ((cbk->op_ret < 0) && (cbk->op_errno == EAGAIN)))
- {
+ case -EC_STATE_PREPARE_ANSWER:
+ if (fop->flock.l_type != F_UNLCK) {
uintptr_t mask;
- fop->error = ec_lock_check(fop, &cbk, &mask);
- if (fop->error != 0)
- {
- if (mask != 0)
- {
- ec_t * ec = fop->xl->private;
- struct gf_flock flock;
+ ec_fop_set_error(fop, ec_lock_check(fop, &mask));
+ if (fop->error != 0) {
+ if (mask != 0) {
+ struct gf_flock flock = {0};
flock.l_type = F_UNLCK;
flock.l_whence = fop->flock.l_whence;
- flock.l_start = fop->flock.l_start * ec->fragments;
- flock.l_len = fop->flock.l_len * ec->fragments;
- flock.l_pid = 0;
- flock.l_owner.len = 0;
+ flock.l_start = fop->flock.l_start;
+ flock.l_len = fop->flock.l_len;
+ flock.l_pid = fop->flock.l_pid;
+ lk_owner_copy(&flock.l_owner, &fop->flock.l_owner);
- ec_lk(fop->req_frame, fop->xl, mask, 1,
- ec_lock_lk_unlocked, NULL, fop->fd, F_SETLK,
- &flock, fop->xdata);
- }
- if (fop->error > 0)
- {
- return EC_STATE_REPORT;
+ ec_lk(fop->frame, fop->xl, mask, 1, ec_lock_lk_unlocked,
+ NULL, fop->fd, F_SETLK, &flock, fop->xdata);
}
- fop->error = 0;
+ if (fop->error < 0) {
+ fop->error = 0;
- fop->int32 = F_SETLKW;
+ fop->int32 = F_SETLKW;
- ec_dispatch_inc(fop);
+ ec_dispatch_inc(fop);
- return EC_STATE_PREPARE_ANSWER;
+ return EC_STATE_PREPARE_ANSWER;
+ }
}
+ } else {
+ ec_fop_prepare_answer(fop, _gf_true);
}
return EC_STATE_REPORT;
@@ -1186,59 +1033,50 @@ int32_t ec_manager_lk(ec_fop_data_t * fop, int32_t state)
GF_ASSERT(cbk != NULL);
- if (fop->cbks.lk != NULL)
- {
+ if (fop->cbks.lk != NULL) {
fop->cbks.lk(fop->req_frame, fop, fop->xl, cbk->op_ret,
cbk->op_errno, &cbk->flock, cbk->xdata);
}
- ec_wait_winds(fop);
-
return EC_STATE_END;
case -EC_STATE_INIT:
case -EC_STATE_DISPATCH:
- case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
GF_ASSERT(fop->error != 0);
- if (fop->cbks.lk != NULL)
- {
- fop->cbks.lk(fop->req_frame, fop, fop->xl, -1, fop->error,
- NULL, NULL);
+ if (fop->cbks.lk != NULL) {
+ fop->cbks.lk(fop->req_frame, fop, fop->xl, -1, fop->error, NULL,
+ NULL);
}
- ec_wait_winds(fop);
-
return EC_STATE_END;
default:
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
- state, ec_fop_name(fop->id));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
return EC_STATE_END;
}
}
-void ec_lk(call_frame_t * frame, xlator_t * this, uintptr_t target,
- int32_t minimum, fop_lk_cbk_t func, void * data, fd_t * fd,
- int32_t cmd, struct gf_flock * flock, dict_t * xdata)
+void
+ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, uint32_t fop_flags,
+ fop_lk_cbk_t func, void *data, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
- ec_cbk_t callback = { .lk = func };
- ec_fop_data_t * fop = NULL;
- int32_t error = EIO;
+ ec_cbk_t callback = {.lk = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
- gf_log("ec", GF_LOG_TRACE, "EC(LK) %p", frame);
+ gf_msg_trace("ec", 0, "EC(LK) %p", frame);
- VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_LK, EC_FLAG_UPDATE_FD_INODE,
- target, minimum, ec_wind_lk, ec_manager_lk,
- callback, data);
- if (fop == NULL)
- {
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_LK, 0, target, fop_flags,
+ ec_wind_lk, ec_manager_lk, callback, data);
+ if (fop == NULL) {
goto out;
}
@@ -1246,38 +1084,34 @@ void ec_lk(call_frame_t * frame, xlator_t * this, uintptr_t target,
fop->int32 = cmd;
- if (fd != NULL)
- {
+ if (fd != NULL) {
fop->fd = fd_ref(fd);
- if (fop->fd == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "file descriptor.");
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
goto out;
}
}
- if (flock != NULL)
- {
+ if (flock != NULL) {
fop->flock.l_type = flock->l_type;
fop->flock.l_whence = flock->l_whence;
fop->flock.l_start = flock->l_start;
fop->flock.l_len = flock->l_len;
fop->flock.l_pid = flock->l_pid;
fop->flock.l_owner.len = flock->l_owner.len;
- if (flock->l_owner.len > 0)
- {
+ if (flock->l_owner.len > 0) {
memcpy(fop->flock.l_owner.data, flock->l_owner.data,
flock->l_owner.len);
}
}
- if (xdata != NULL)
- {
+ if (xdata != NULL) {
fop->xdata = dict_ref(xdata);
- if (fop->xdata == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to reference a "
- "dictionary.");
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
goto out;
}
@@ -1286,12 +1120,9 @@ void ec_lk(call_frame_t * frame, xlator_t * this, uintptr_t target,
error = 0;
out:
- if (fop != NULL)
- {
+ if (fop != NULL) {
ec_manager(fop, error);
- }
- else
- {
- func(frame, NULL, this, -1, EIO, NULL, NULL);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
}
}
diff --git a/xlators/cluster/ec/src/ec-mem-types.h b/xlators/cluster/ec/src/ec-mem-types.h
index df65a031590..3252c4c1c58 100644
--- a/xlators/cluster/ec/src/ec-mem-types.h
+++ b/xlators/cluster/ec/src/ec-mem-types.h
@@ -11,16 +11,19 @@
#ifndef __EC_MEM_TYPES_H__
#define __EC_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
-enum gf_ec_mem_types_
-{
+enum gf_ec_mem_types_ {
ec_mt_ec_t = gf_common_mt_end + 1,
ec_mt_xlator_t,
ec_mt_ec_inode_t,
ec_mt_ec_fd_t,
- ec_mt_ec_heal_t,
ec_mt_subvol_healer_t,
+ ec_mt_ec_gf_t,
+ ec_mt_ec_code_t,
+ ec_mt_ec_code_builder_t,
+ ec_mt_ec_matrix_t,
+ ec_mt_ec_stripe_t,
ec_mt_end
};
diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h
new file mode 100644
index 00000000000..72e98f11286
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-messages.h
@@ -0,0 +1,61 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _EC_MESSAGES_H_
+#define _EC_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL,
+ EC_MSG_DICT_COMBINE_FAIL, EC_MSG_STIME_COMBINE_FAIL,
+ EC_MSG_INVALID_DICT_NUMS, EC_MSG_IATT_COMBINE_FAIL,
+ EC_MSG_INVALID_FORMAT, EC_MSG_DICT_GET_FAILED,
+ EC_MSG_UNHANDLED_STATE, EC_MSG_FILE_DESC_REF_FAIL,
+ EC_MSG_LOC_COPY_FAIL, EC_MSG_BUF_REF_FAIL, EC_MSG_DICT_REF_FAIL,
+ EC_MSG_LK_UNLOCK_FAILED, EC_MSG_UNLOCK_FAILED,
+ EC_MSG_LOC_PARENT_INODE_MISSING, EC_MSG_INVALID_LOC_NAME,
+ EC_MSG_NO_MEMORY, EC_MSG_GFID_MISMATCH, EC_MSG_UNSUPPORTED_VERSION,
+ EC_MSG_FD_CREATE_FAIL, EC_MSG_READDIRP_REQ_PREP_FAIL,
+ EC_MSG_LOOKUP_REQ_PREP_FAIL, EC_MSG_INODE_REF_FAIL,
+ EC_MSG_LOOKUP_READAHEAD_FAIL, EC_MSG_FRAME_MISMATCH,
+ EC_MSG_XLATOR_MISMATCH, EC_MSG_VECTOR_MISMATCH, EC_MSG_IATT_MISMATCH,
+ EC_MSG_FD_MISMATCH, EC_MSG_DICT_MISMATCH, EC_MSG_INDEX_DIR_GET_FAIL,
+ EC_MSG_PREOP_LOCK_FAILED, EC_MSG_CHILDS_INSUFFICIENT,
+ EC_MSG_OP_EXEC_UNAVAIL, EC_MSG_UNLOCK_DELAY_FAILED,
+ EC_MSG_SIZE_VERS_UPDATE_FAIL, EC_MSG_INVALID_REQUEST,
+ EC_MSG_INVALID_LOCK_TYPE, EC_MSG_SIZE_VERS_GET_FAIL,
+ EC_MSG_FILE_SIZE_GET_FAIL, EC_MSG_FOP_MISMATCH,
+ EC_MSG_SUBVOL_ID_DICT_SET_FAIL, EC_MSG_SUBVOL_BUILD_FAIL,
+ EC_MSG_XLATOR_INIT_FAIL, EC_MSG_NO_PARENTS, EC_MSG_TIMER_CREATE_FAIL,
+ EC_MSG_TOO_MANY_SUBVOLS, EC_MSG_DATA_UNAVAILABLE,
+ EC_MSG_INODE_REMOVE_FAIL, EC_MSG_INVALID_REDUNDANCY,
+ EC_MSG_XLATOR_PARSE_OPT_FAIL, EC_MSG_OP_FAIL_ON_SUBVOLS,
+ EC_MSG_INVALID_INODE, EC_MSG_LOCK_MISMATCH, EC_MSG_XDATA_MISMATCH,
+ EC_MSG_HEALING_INFO, EC_MSG_HEAL_SUCCESS, EC_MSG_FULL_SWEEP_START,
+ EC_MSG_FULL_SWEEP_STOP, EC_MSG_INVALID_FOP, EC_MSG_EC_UP,
+ EC_MSG_EC_DOWN, EC_MSG_SIZE_XATTR_GET_FAIL,
+ EC_MSG_VER_XATTR_GET_FAIL, EC_MSG_CONFIG_XATTR_GET_FAIL,
+ EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE,
+ EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED,
+ EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED,
+ EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED,
+ EC_MSG_THREAD_CLEANUP_FAILED, EC_MSG_FD_BAD);
+
+#endif /* !_EC_MESSAGES_H_ */
diff --git a/xlators/cluster/ec/src/ec-method.c b/xlators/cluster/ec/src/ec-method.c
index faab0115cdd..55faed0b193 100644
--- a/xlators/cluster/ec/src/ec-method.c
+++ b/xlators/cluster/ec/src/ec-method.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ Copyright (c) 2012-2015 DataLab, s.l. <http://www.datalab.es>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -11,149 +11,423 @@
#include <string.h>
#include <inttypes.h>
-#include "ec-gf.h"
+#include "ec-types.h"
+#include "ec-mem-types.h"
+#include "ec-galois.h"
+#include "ec-code.h"
#include "ec-method.h"
+#include "ec-helpers.h"
-static uint32_t GfPow[EC_GF_SIZE << 1];
-static uint32_t GfLog[EC_GF_SIZE << 1];
+static void
+ec_method_matrix_normal(ec_gf_t *gf, uint32_t *matrix, uint32_t columns,
+ uint32_t *values, uint32_t count)
+{
+ uint32_t i, j, v, tmp;
+
+ columns--;
+ for (i = 0; i < count; i++) {
+ v = *values++;
+ *matrix++ = tmp = ec_gf_exp(gf, v, columns);
+ for (j = 0; j < columns; j++) {
+ *matrix++ = tmp = ec_gf_div(gf, tmp, v);
+ }
+ }
+}
+
+static void
+ec_method_matrix_inverse(ec_gf_t *gf, uint32_t *matrix, uint32_t *values,
+ uint32_t count)
+{
+ uint32_t a[count];
+ uint32_t i, j, p, last, tmp;
+
+ last = count - 1;
+ for (i = 0; i < last; i++) {
+ a[i] = 1;
+ }
+ a[i] = values[0];
+ for (i = last; i > 0; i--) {
+ for (j = i - 1; j < last; j++) {
+ a[j] = a[j + 1] ^ ec_gf_mul(gf, values[i], a[j]);
+ }
+ a[j] = ec_gf_mul(gf, values[i], a[j]);
+ }
+ for (i = 0; i < count; i++) {
+ p = a[0];
+ matrix += count;
+ *matrix = tmp = p ^ values[i];
+ for (j = 1; j < last; j++) {
+ matrix += count;
+ *matrix = tmp = a[j] ^ ec_gf_mul(gf, values[i], tmp);
+ p = tmp ^ ec_gf_mul(gf, values[i], p);
+ }
+ for (j = 0; j < last; j++) {
+ *matrix = ec_gf_div(gf, *matrix, p);
+ matrix -= count;
+ }
+ *matrix = ec_gf_div(gf, 1, p);
+ matrix++;
+ }
+}
-void ec_method_initialize(void)
+static void
+ec_method_matrix_init(ec_matrix_list_t *list, ec_matrix_t *matrix,
+ uintptr_t mask, uint32_t *rows, gf_boolean_t inverse)
{
uint32_t i;
- GfPow[0] = 1;
- GfLog[0] = EC_GF_SIZE;
- for (i = 1; i < EC_GF_SIZE; i++)
- {
- GfPow[i] = GfPow[i - 1] << 1;
- if (GfPow[i] >= EC_GF_SIZE)
- {
- GfPow[i] ^= EC_GF_MOD;
+ matrix->refs = 1;
+ matrix->mask = mask;
+ matrix->code = list->code;
+ matrix->columns = list->columns;
+ INIT_LIST_HEAD(&matrix->lru);
+
+ if (inverse) {
+ matrix->rows = list->columns;
+ ec_method_matrix_inverse(matrix->code->gf, matrix->values, rows,
+ matrix->rows);
+ for (i = 0; i < matrix->rows; i++) {
+ matrix->row_data[i].values = matrix->values + i * matrix->columns;
+ matrix->row_data[i].func.interleaved = ec_code_build_interleaved(
+ matrix->code, EC_METHOD_WORD_SIZE, matrix->row_data[i].values,
+ matrix->columns);
+ }
+ } else {
+ matrix->rows = list->rows;
+ ec_method_matrix_normal(matrix->code->gf, matrix->values,
+ matrix->columns, rows, matrix->rows);
+ for (i = 0; i < matrix->rows; i++) {
+ matrix->row_data[i].values = matrix->values + i * matrix->columns;
+ matrix->row_data[i].func.linear = ec_code_build_linear(
+ matrix->code, EC_METHOD_WORD_SIZE, matrix->row_data[i].values,
+ matrix->columns);
}
- GfPow[i + EC_GF_SIZE - 1] = GfPow[i];
- GfLog[GfPow[i] + EC_GF_SIZE - 1] = GfLog[GfPow[i]] = i;
}
}
-static uint32_t ec_method_mul(uint32_t a, uint32_t b)
+static void
+ec_method_matrix_release(ec_matrix_t *matrix)
{
- if (a && b)
- {
- return GfPow[GfLog[a] + GfLog[b]];
+ uint32_t i;
+
+ for (i = 0; i < matrix->rows; i++) {
+ if (matrix->row_data[i].func.linear != NULL) {
+ ec_code_release(matrix->code, &matrix->row_data[i].func);
+ matrix->row_data[i].func.linear = NULL;
+ }
}
- return 0;
}
-static uint32_t ec_method_div(uint32_t a, uint32_t b)
+static void
+ec_method_matrix_destroy(ec_matrix_list_t *list, ec_matrix_t *matrix)
+{
+ list_del_init(&matrix->lru);
+
+ ec_method_matrix_release(matrix);
+
+ mem_put(matrix);
+
+ list->count--;
+}
+
+static void
+ec_method_matrix_unref(ec_matrix_list_t *list, ec_matrix_t *matrix)
{
- if (b)
- {
- if (a)
- {
- return GfPow[EC_GF_SIZE - 1 + GfLog[a] - GfLog[b]];
+ if (--matrix->refs == 0) {
+ list_add_tail(&matrix->lru, &list->lru);
+ if (list->count > list->max) {
+ matrix = list_first_entry(&list->lru, ec_matrix_t, lru);
+ ec_method_matrix_destroy(list, matrix);
}
- return 0;
}
- return EC_GF_SIZE;
}
-size_t ec_method_encode(size_t size, uint32_t columns, uint32_t row,
- uint8_t * in, uint8_t * out)
+static ec_matrix_t *
+ec_method_matrix_lookup(ec_matrix_list_t *list, uintptr_t mask, uint32_t *pos)
{
- uint32_t i, j;
+ ec_matrix_t *matrix;
+ uint32_t i, j, k;
- size /= EC_METHOD_CHUNK_SIZE * columns;
- row++;
- for (j = 0; j < size; j++)
- {
- ec_gf_muladd[0](out, in, EC_METHOD_WIDTH);
- in += EC_METHOD_CHUNK_SIZE;
- for (i = 1; i < columns; i++)
- {
- ec_gf_muladd[row](out, in, EC_METHOD_WIDTH);
- in += EC_METHOD_CHUNK_SIZE;
+ i = 0;
+ j = list->count;
+ while (i < j) {
+ k = (i + j) >> 1;
+ matrix = list->objects[k];
+ if (matrix->mask == mask) {
+ *pos = k;
+ return matrix;
+ }
+ if (matrix->mask < mask) {
+ i = k + 1;
+ } else {
+ j = k;
}
- out += EC_METHOD_CHUNK_SIZE;
}
+ *pos = i;
- return size * EC_METHOD_CHUNK_SIZE;
+ return NULL;
}
-size_t ec_method_decode(size_t size, uint32_t columns, uint32_t * rows,
- uint8_t ** in, uint8_t * out)
+static void
+ec_method_matrix_remove(ec_matrix_list_t *list, uintptr_t mask)
{
- uint32_t i, j, k, off, last, value;
- uint32_t f;
- uint8_t inv[EC_METHOD_MAX_FRAGMENTS][EC_METHOD_MAX_FRAGMENTS + 1];
- uint8_t mtx[EC_METHOD_MAX_FRAGMENTS][EC_METHOD_MAX_FRAGMENTS];
- uint8_t dummy[EC_METHOD_CHUNK_SIZE];
+ uint32_t pos;
- size /= EC_METHOD_CHUNK_SIZE;
+ if (ec_method_matrix_lookup(list, mask, &pos) != NULL) {
+ list->count--;
+ if (pos < list->count) {
+ memmove(list->objects + pos, list->objects + pos + 1,
+ sizeof(ec_matrix_t *) * (list->count - pos));
+ }
+ }
+}
+
+static void
+ec_method_matrix_insert(ec_matrix_list_t *list, ec_matrix_t *matrix)
+{
+ uint32_t pos;
- memset(inv, 0, sizeof(inv));
- memset(mtx, 0, sizeof(mtx));
- memset(dummy, 0, sizeof(dummy));
- for (i = 0; i < columns; i++)
- {
- inv[i][i] = 1;
- inv[i][columns] = 1;
+ GF_ASSERT(ec_method_matrix_lookup(list, matrix->mask, &pos) == NULL);
+
+ if (pos < list->count) {
+ memmove(list->objects + pos + 1, list->objects + pos,
+ sizeof(ec_matrix_t *) * (list->count - pos));
}
- for (i = 0; i < columns; i++)
- {
- mtx[i][columns - 1] = 1;
- for (j = columns - 1; j > 0; j--)
- {
- mtx[i][j - 1] = ec_method_mul(mtx[i][j], rows[i] + 1);
- }
+ list->objects[pos] = matrix;
+ list->count++;
+}
+
+static ec_matrix_t *
+ec_method_matrix_get(ec_matrix_list_t *list, uintptr_t mask, uint32_t *rows)
+{
+ ec_matrix_t *matrix;
+ uint32_t pos;
+
+ LOCK(&list->lock);
+
+ matrix = ec_method_matrix_lookup(list, mask, &pos);
+ if (matrix != NULL) {
+ list_del_init(&matrix->lru);
+ matrix->refs++;
+
+ goto out;
}
- for (i = 0; i < columns; i++)
- {
- f = mtx[i][i];
- for (j = 0; j < columns; j++)
- {
- mtx[i][j] = ec_method_div(mtx[i][j], f);
- inv[i][j] = ec_method_div(inv[i][j], f);
+ if ((list->count >= list->max) && !list_empty(&list->lru)) {
+ matrix = list_first_entry(&list->lru, ec_matrix_t, lru);
+ list_del_init(&matrix->lru);
+
+ ec_method_matrix_remove(list, matrix->mask);
+
+ ec_method_matrix_release(matrix);
+ } else {
+ matrix = mem_get0(list->pool);
+ if (matrix == NULL) {
+ matrix = EC_ERR(ENOMEM);
+ goto out;
}
- for (j = 0; j < columns; j++)
- {
- if (i != j)
- {
- f = mtx[j][i];
- for (k = 0; k < columns; k++)
- {
- mtx[j][k] ^= ec_method_mul(mtx[i][k], f);
- inv[j][k] ^= ec_method_mul(inv[i][k], f);
- }
- }
+ matrix->values = (uint32_t *)((uintptr_t)matrix + sizeof(ec_matrix_t) +
+ sizeof(ec_matrix_row_t) * list->columns);
+ }
+
+ ec_method_matrix_init(list, matrix, mask, rows, _gf_true);
+
+ if (list->count < list->max) {
+ ec_method_matrix_insert(list, matrix);
+ } else {
+ matrix->mask = 0;
+ }
+
+out:
+ UNLOCK(&list->lock);
+
+ return matrix;
+}
+
+static void
+ec_method_matrix_put(ec_matrix_list_t *list, ec_matrix_t *matrix)
+{
+ LOCK(&list->lock);
+
+ ec_method_matrix_unref(list, matrix);
+
+ UNLOCK(&list->lock);
+}
+
+static int32_t
+ec_method_setup(xlator_t *xl, ec_matrix_list_t *list, const char *gen)
+{
+ ec_matrix_t *matrix;
+ uint32_t values[list->rows];
+ uint32_t i;
+ int32_t err;
+
+ matrix = GF_MALLOC(sizeof(ec_matrix_t) +
+ sizeof(ec_matrix_row_t) * list->rows +
+ sizeof(uint32_t) * list->columns * list->rows,
+ ec_mt_ec_matrix_t);
+ if (matrix == NULL) {
+ err = -ENOMEM;
+ goto failed;
+ }
+ memset(matrix, 0, sizeof(ec_matrix_t));
+ matrix->values = (uint32_t *)((uintptr_t)matrix + sizeof(ec_matrix_t) +
+ sizeof(ec_matrix_row_t) * list->rows);
+
+ list->code = ec_code_create(list->gf, ec_code_detect(xl, gen));
+ if (EC_IS_ERR(list->code)) {
+ err = EC_GET_ERR(list->code);
+ list->code = NULL;
+ goto failed_matrix;
+ }
+
+ for (i = 0; i < list->rows; i++) {
+ values[i] = i + 1;
+ }
+ ec_method_matrix_init(list, matrix, 0, values, _gf_false);
+
+ list->encode = matrix;
+
+ return 0;
+
+failed_matrix:
+ GF_FREE(matrix);
+failed:
+ return err;
+}
+
+int32_t
+ec_method_init(xlator_t *xl, ec_matrix_list_t *list, uint32_t columns,
+ uint32_t rows, uint32_t max, const char *gen)
+{
+ list->columns = columns;
+ list->rows = rows;
+ list->max = max;
+ list->stripe = EC_METHOD_CHUNK_SIZE * list->columns;
+ INIT_LIST_HEAD(&list->lru);
+ int32_t err;
+
+ list->pool = mem_pool_new_fn(xl->ctx,
+ sizeof(ec_matrix_t) +
+ sizeof(ec_matrix_row_t) * columns +
+ sizeof(uint32_t) * columns * columns,
+ 128, "ec_matrix_t");
+ if (list->pool == NULL) {
+ err = -ENOMEM;
+ goto failed;
+ }
+
+ list->objects = GF_MALLOC(sizeof(ec_matrix_t *) * max, ec_mt_ec_matrix_t);
+ if (list->objects == NULL) {
+ err = -ENOMEM;
+ goto failed_pool;
+ }
+
+ list->gf = ec_gf_prepare(EC_GF_BITS, EC_GF_MOD);
+ if (EC_IS_ERR(list->gf)) {
+ err = EC_GET_ERR(list->gf);
+ goto failed_objects;
+ }
+
+ err = ec_method_setup(xl, list, gen);
+ if (err != 0) {
+ goto failed_gf;
+ }
+
+ LOCK_INIT(&list->lock);
+
+ return 0;
+
+failed_gf:
+ ec_gf_destroy(list->gf);
+failed_objects:
+ GF_FREE(list->objects);
+failed_pool:
+ mem_pool_destroy(list->pool);
+failed:
+ list->pool = NULL;
+ list->objects = NULL;
+ list->gf = NULL;
+
+ return err;
+}
+
+void
+ec_method_fini(ec_matrix_list_t *list)
+{
+ ec_matrix_t *matrix;
+
+ if (list->encode == NULL) {
+ return;
+ }
+
+ while (!list_empty(&list->lru)) {
+ matrix = list_first_entry(&list->lru, ec_matrix_t, lru);
+ ec_method_matrix_destroy(list, matrix);
+ }
+
+ GF_ASSERT(list->count == 0);
+
+ if (list->pool) /*Init was successful*/
+ LOCK_DESTROY(&list->lock);
+
+ ec_method_matrix_release(list->encode);
+ GF_FREE(list->encode);
+
+ ec_code_destroy(list->code);
+ ec_gf_destroy(list->gf);
+ GF_FREE(list->objects);
+
+ if (list->pool)
+ mem_pool_destroy(list->pool);
+}
+
+int32_t
+ec_method_update(xlator_t *xl, ec_matrix_list_t *list, const char *gen)
+{
+ /* TODO: Allow changing code generator */
+
+ return 0;
+}
+
+void
+ec_method_encode(ec_matrix_list_t *list, uint64_t size, void *in, void **out)
+{
+ ec_matrix_t *matrix;
+ uint64_t pos;
+ uint32_t i;
+
+ matrix = list->encode;
+ for (pos = 0; pos < size; pos += list->stripe) {
+ for (i = 0; i < matrix->rows; i++) {
+ matrix->row_data[i].func.linear(
+ out[i], in, pos, matrix->row_data[i].values, list->columns);
+ out[i] += EC_METHOD_CHUNK_SIZE;
}
}
- off = 0;
- for (f = 0; f < size; f++)
- {
- for (i = 0; i < columns; i++)
- {
- last = 0;
- j = 0;
- do
- {
- while (inv[i][j] == 0)
- {
- j++;
- }
- if (j < columns)
- {
- value = ec_method_div(last, inv[i][j]);
- last = inv[i][j];
- ec_gf_muladd[value](out, in[j] + off, EC_METHOD_WIDTH);
- j++;
- }
- } while (j < columns);
- ec_gf_muladd[last](out, dummy, EC_METHOD_WIDTH);
+}
+
+int32_t
+ec_method_decode(ec_matrix_list_t *list, uint64_t size, uintptr_t mask,
+ uint32_t *rows, void **in, void *out)
+{
+ ec_matrix_t *matrix;
+ uint64_t pos;
+ uint32_t i;
+
+ matrix = ec_method_matrix_get(list, mask, rows);
+ if (EC_IS_ERR(matrix)) {
+ return EC_GET_ERR(matrix);
+ }
+ for (pos = 0; pos < size; pos += EC_METHOD_CHUNK_SIZE) {
+ for (i = 0; i < matrix->rows; i++) {
+ matrix->row_data[i].func.interleaved(
+ out, in, pos, matrix->row_data[i].values, list->columns);
out += EC_METHOD_CHUNK_SIZE;
}
- off += EC_METHOD_CHUNK_SIZE;
}
- return size * EC_METHOD_CHUNK_SIZE * columns;
+ ec_method_matrix_put(list, matrix);
+
+ return 0;
}
diff --git a/xlators/cluster/ec/src/ec-method.h b/xlators/cluster/ec/src/ec-method.h
index 29b46e10443..f91233b2f88 100644
--- a/xlators/cluster/ec/src/ec-method.h
+++ b/xlators/cluster/ec/src/ec-method.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ Copyright (c) 2012-2015 DataLab, s.l. <http://www.datalab.es>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -11,22 +11,38 @@
#ifndef __EC_METHOD_H__
#define __EC_METHOD_H__
-#include "ec-gf.h"
+#include "ec-types.h"
+#include "ec-galois.h"
+
+#define EC_GF_BITS 8
+#define EC_GF_MOD 0x11D
+
+#define EC_GF_SIZE (1 << EC_GF_BITS)
/* Determines the maximum size of the matrix used to encode/decode data */
#define EC_METHOD_MAX_FRAGMENTS 16
/* Determines the maximum number of usable elements in the Galois Field */
-#define EC_METHOD_MAX_NODES (EC_GF_SIZE - 1)
+#define EC_METHOD_MAX_NODES (EC_GF_SIZE - 1)
#define EC_METHOD_WORD_SIZE 64
#define EC_METHOD_CHUNK_SIZE (EC_METHOD_WORD_SIZE * EC_GF_BITS)
-#define EC_METHOD_WIDTH (EC_METHOD_WORD_SIZE / EC_GF_WORD_SIZE)
-void ec_method_initialize(void);
-size_t ec_method_encode(size_t size, uint32_t columns, uint32_t row,
- uint8_t * in, uint8_t * out);
-size_t ec_method_decode(size_t size, uint32_t columns, uint32_t * rows,
- uint8_t ** in, uint8_t * out);
+int32_t
+ec_method_init(xlator_t *xl, ec_matrix_list_t *list, uint32_t columns,
+ uint32_t rows, uint32_t max, const char *gen);
+
+void
+ec_method_fini(ec_matrix_list_t *list);
+
+int32_t
+ec_method_update(xlator_t *xl, ec_matrix_list_t *list, const char *gen);
+
+void
+ec_method_encode(ec_matrix_list_t *list, uint64_t size, void *in, void **out);
+
+int32_t
+ec_method_decode(ec_matrix_list_t *list, uint64_t size, uintptr_t mask,
+ uint32_t *rows, void **in, void *out);
#endif /* __EC_METHOD_H__ */
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
new file mode 100644
index 00000000000..de9b89bb2c9
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-types.h
@@ -0,0 +1,690 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_TYPES_H__
+#define __EC_TYPES_H__
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/timer.h>
+#include "libxlator.h"
+#include <glusterfs/atomic.h>
+
+#define EC_GF_MAX_REGS 16
+
+enum _ec_heal_need;
+typedef enum _ec_heal_need ec_heal_need_t;
+
+enum _ec_stripe_part;
+typedef enum _ec_stripe_part ec_stripe_part_t;
+
+enum _ec_read_policy;
+typedef enum _ec_read_policy ec_read_policy_t;
+
+struct _ec_config;
+typedef struct _ec_config ec_config_t;
+
+struct _ec_fd;
+typedef struct _ec_fd ec_fd_t;
+
+struct _ec_fragment_range;
+typedef struct _ec_fragment_range ec_fragment_range_t;
+
+struct _ec_inode;
+typedef struct _ec_inode ec_inode_t;
+
+union _ec_cbk;
+typedef union _ec_cbk ec_cbk_t;
+
+struct _ec_lock;
+typedef struct _ec_lock ec_lock_t;
+
+struct _ec_lock_link;
+typedef struct _ec_lock_link ec_lock_link_t;
+
+struct _ec_fop_data;
+typedef struct _ec_fop_data ec_fop_data_t;
+
+struct _ec_cbk_data;
+typedef struct _ec_cbk_data ec_cbk_data_t;
+
+enum _ec_gf_opcode;
+typedef enum _ec_gf_opcode ec_gf_opcode_t;
+
+struct _ec_gf_op;
+typedef struct _ec_gf_op ec_gf_op_t;
+
+struct _ec_gf_mul;
+typedef struct _ec_gf_mul ec_gf_mul_t;
+
+struct _ec_gf;
+typedef struct _ec_gf ec_gf_t;
+
+struct _ec_code_gen;
+typedef struct _ec_code_gen ec_code_gen_t;
+
+struct _ec_code;
+typedef struct _ec_code ec_code_t;
+
+struct _ec_code_arg;
+typedef struct _ec_code_arg ec_code_arg_t;
+
+struct _ec_code_op;
+typedef struct _ec_code_op ec_code_op_t;
+
+struct _ec_code_builder;
+typedef struct _ec_code_builder ec_code_builder_t;
+
+struct _ec_code_chunk;
+typedef struct _ec_code_chunk ec_code_chunk_t;
+
+struct _ec_stripe;
+typedef struct _ec_stripe ec_stripe_t;
+
+struct _ec_stripe_list;
+typedef struct _ec_stripe_list ec_stripe_list_t;
+
+struct _ec_code_space;
+typedef struct _ec_code_space ec_code_space_t;
+
+typedef void (*ec_code_func_linear_t)(void *dst, void *src, uint64_t offset,
+ uint32_t *values, uint32_t count);
+
+typedef void (*ec_code_func_interleaved_t)(void *dst, void **src,
+ uint64_t offset, uint32_t *values,
+ uint32_t count);
+
+union _ec_code_func;
+typedef union _ec_code_func ec_code_func_t;
+
+struct _ec_matrix_row;
+typedef struct _ec_matrix_row ec_matrix_row_t;
+
+struct _ec_matrix;
+typedef struct _ec_matrix ec_matrix_t;
+
+struct _ec_matrix_list;
+typedef struct _ec_matrix_list ec_matrix_list_t;
+
+struct _ec_heal;
+typedef struct _ec_heal ec_heal_t;
+
+struct _ec_self_heald;
+typedef struct _ec_self_heald ec_self_heald_t;
+
+struct _ec_statistics;
+typedef struct _ec_statistics ec_statistics_t;
+
+struct _ec;
+typedef struct _ec ec_t;
+
+typedef void (*ec_wind_f)(ec_t *, ec_fop_data_t *, int32_t);
+typedef int32_t (*ec_handler_f)(ec_fop_data_t *, int32_t);
+typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t);
+
+enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX };
+
+enum _ec_heal_need {
+ EC_HEAL_NONEED,
+ EC_HEAL_MAYBE,
+ EC_HEAL_MUST,
+ EC_HEAL_PURGE_INDEX
+};
+
+enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL };
+
+/* Enumartions to indicate FD status. */
+typedef enum { EC_FD_NOT_OPENED, EC_FD_OPENED, EC_FD_OPENING } ec_fd_status_t;
+
+struct _ec_config {
+ uint32_t version;
+ uint8_t algorithm;
+ uint8_t gf_word_size;
+ uint8_t bricks;
+ uint8_t redundancy;
+ uint32_t chunk_size;
+};
+
+struct _ec_fd {
+ loc_t loc;
+ uintptr_t open;
+ int32_t flags;
+ uint64_t bad_version;
+ ec_fd_status_t fd_status[0];
+};
+
+struct _ec_stripe {
+ struct list_head lru; /* LRU list member */
+ uint64_t frag_offset; /* Fragment offset of this stripe */
+ char data[]; /* Contents of the stripe */
+};
+
+struct _ec_stripe_list {
+ struct list_head lru;
+ uint32_t count;
+ uint32_t max;
+};
+
+struct _ec_inode {
+ ec_lock_t *inode_lock;
+ gf_boolean_t have_info;
+ gf_boolean_t have_config;
+ gf_boolean_t have_version;
+ gf_boolean_t have_size;
+ int32_t heal_count;
+ ec_config_t config;
+ uint64_t pre_version[2];
+ uint64_t post_version[2];
+ uint64_t pre_size;
+ uint64_t post_size;
+ uint64_t dirty[2];
+ struct list_head heal;
+ ec_stripe_list_t stripe_cache;
+ uint64_t bad_version;
+};
+
+typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
+ int32_t, uintptr_t, uintptr_t, uintptr_t,
+ uint32_t, dict_t *);
+typedef int32_t (*fop_fheal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
+ int32_t, uintptr_t, uintptr_t, uintptr_t,
+ uint32_t, dict_t *);
+
+union _ec_cbk {
+ fop_access_cbk_t access;
+ fop_create_cbk_t create;
+ fop_discard_cbk_t discard;
+ fop_entrylk_cbk_t entrylk;
+ fop_fentrylk_cbk_t fentrylk;
+ fop_fallocate_cbk_t fallocate;
+ fop_flush_cbk_t flush;
+ fop_fsync_cbk_t fsync;
+ fop_fsyncdir_cbk_t fsyncdir;
+ fop_getxattr_cbk_t getxattr;
+ fop_fgetxattr_cbk_t fgetxattr;
+ fop_heal_cbk_t heal;
+ fop_fheal_cbk_t fheal;
+ fop_inodelk_cbk_t inodelk;
+ fop_finodelk_cbk_t finodelk;
+ fop_link_cbk_t link;
+ fop_lk_cbk_t lk;
+ fop_lookup_cbk_t lookup;
+ fop_mkdir_cbk_t mkdir;
+ fop_mknod_cbk_t mknod;
+ fop_open_cbk_t open;
+ fop_opendir_cbk_t opendir;
+ fop_readdir_cbk_t readdir;
+ fop_readdirp_cbk_t readdirp;
+ fop_readlink_cbk_t readlink;
+ fop_readv_cbk_t readv;
+ fop_removexattr_cbk_t removexattr;
+ fop_fremovexattr_cbk_t fremovexattr;
+ fop_rename_cbk_t rename;
+ fop_rmdir_cbk_t rmdir;
+ fop_setattr_cbk_t setattr;
+ fop_fsetattr_cbk_t fsetattr;
+ fop_setxattr_cbk_t setxattr;
+ fop_fsetxattr_cbk_t fsetxattr;
+ fop_stat_cbk_t stat;
+ fop_fstat_cbk_t fstat;
+ fop_statfs_cbk_t statfs;
+ fop_symlink_cbk_t symlink;
+ fop_truncate_cbk_t truncate;
+ fop_ftruncate_cbk_t ftruncate;
+ fop_unlink_cbk_t unlink;
+ fop_writev_cbk_t writev;
+ fop_xattrop_cbk_t xattrop;
+ fop_fxattrop_cbk_t fxattrop;
+ fop_zerofill_cbk_t zerofill;
+ fop_seek_cbk_t seek;
+ fop_ipc_cbk_t ipc;
+};
+
+struct _ec_lock {
+ ec_inode_t *ctx;
+ gf_timer_t *timer;
+
+ /* List of owners of this lock. All fops added to this list are running
+ * concurrently. */
+ struct list_head owners;
+
+ /* List of fops waiting to be an owner of the lock. Fops are added to this
+ * list when the current owner has an incompatible access (conflicting lock)
+ * or the lock is not acquired yet. */
+ struct list_head waiting;
+
+ /* List of fops that will wait until the next unlock/lock cycle. This
+ * happens when the currently acquired lock is decided to be released as
+ * soon as possible. In this case, all frozen fops will be continued only
+ * after the lock is reacquired. */
+ struct list_head frozen;
+
+ uintptr_t mask;
+ uintptr_t good_mask;
+ uintptr_t healing;
+ uint32_t refs_owners; /* Refs for fops owning the lock */
+ uint32_t refs_pending; /* Refs assigned to fops being prepared */
+ uint32_t waiting_flags; /*Track xattrop/dirty marking*/
+ gf_boolean_t acquired;
+ gf_boolean_t contention;
+ gf_boolean_t unlock_now;
+ gf_boolean_t release;
+ gf_boolean_t query;
+ fd_t *fd;
+ loc_t loc;
+ union {
+ entrylk_type type;
+ struct gf_flock flock;
+ };
+};
+
+struct _ec_lock_link {
+ ec_lock_t *lock;
+ ec_fop_data_t *fop;
+ struct list_head owner_list;
+ struct list_head wait_list;
+ gf_boolean_t update[2];
+ gf_boolean_t dirty[2];
+ gf_boolean_t optimistic_changelog;
+ loc_t *base;
+ uint64_t size;
+ uint32_t waiting_flags;
+ off_t fl_start;
+ off_t fl_end;
+};
+
+/* This structure keeps a range of fragment offsets affected by a fop. Since
+ * real file offsets can be difficult to handle correctly because of overflows,
+ * we use the 'scaled' offset, which corresponds to the offset of the fragment
+ * seen by the bricks, which is always smaller and cannot overflow. */
+struct _ec_fragment_range {
+ uint64_t first; /* Address of the first affected fragment as seen by the
+ bricks (offset on brick) */
+ uint64_t last; /* Address of the first non affected fragment as seen by
+ the bricks (offset on brick) */
+};
+
+/* EC xlator data structure to collect all the data required to perform
+ * the file operation.*/
+struct _ec_fop_data {
+ int32_t id; /* ID of the file operation */
+ int32_t refs;
+ int32_t state;
+ uint32_t minimum; /* Minimum number of successful
+ operation required to conclude a
+ fop as successful */
+ int32_t expected;
+ int32_t winds;
+ int32_t jobs;
+ int32_t error;
+ ec_fop_data_t *parent;
+ xlator_t *xl; /* points to EC xlator */
+ call_frame_t *req_frame; /* frame of the calling xlator */
+ call_frame_t *frame; /* frame used by this fop */
+ struct list_head cbk_list; /* sorted list of groups of answers */
+ struct list_head answer_list; /* list of answers */
+ struct list_head pending_list; /* member of ec_t.pending_fops */
+ ec_cbk_data_t *answer; /* accepted answer */
+ int32_t lock_count;
+ int32_t locked;
+ gf_lock_t lock;
+ ec_lock_link_t locks[2];
+ int32_t first_lock;
+
+ uint32_t fop_flags; /* Flags passed by the caller. */
+ uint32_t flags; /* Internal flags. */
+ uint32_t first;
+ uintptr_t mask;
+ uintptr_t healing; /*Dispatch is done but call is successful only
+ if fop->minimum number of subvolumes succeed
+ which are not healing*/
+ uintptr_t remaining;
+ uintptr_t received; /* Mask of responses */
+ uintptr_t good;
+
+ uid_t uid;
+ gid_t gid;
+
+ ec_wind_f wind; /* Function to wind to */
+ ec_handler_f handler; /* FOP manager function */
+ ec_resume_f resume;
+ ec_cbk_t cbks; /* Callback function for this FOP */
+ void *data;
+ ec_heal_t *heal;
+ struct list_head healer;
+
+ uint64_t user_size;
+ uint32_t head;
+
+ int32_t use_fd; /* Indicates whether this FOP uses FD or
+ not */
+
+ dict_t *xdata;
+ dict_t *dict;
+ int32_t int32;
+ uint32_t uint32;
+ uint64_t size;
+ off_t offset;
+ mode_t mode[2];
+ entrylk_cmd entrylk_cmd;
+ entrylk_type entrylk_type;
+ gf_xattrop_flags_t xattrop_flags;
+ dev_t dev;
+ inode_t *inode;
+ fd_t *fd; /* FD of the file on which FOP is
+ being carried upon */
+ struct iatt iatt;
+ char *str[2];
+ loc_t loc[2]; /* Holds the location details for
+ the file */
+ struct gf_flock flock;
+ struct iovec *vector;
+ struct iobref *buffers;
+ gf_seek_what_t seek;
+ ec_fragment_range_t frag_range; /* This will hold the range of stripes
+ affected by the fop. */
+ char *errstr; /*String of fop name, path and gfid
+ to be used in gf_msg. */
+};
+
+struct _ec_cbk_data {
+ struct list_head list; /* item in the sorted list of groups */
+ struct list_head answer_list; /* item in the list of answers */
+ ec_fop_data_t *fop;
+ ec_cbk_data_t *next; /* next answer in the same group */
+ uint32_t idx;
+ int32_t op_ret;
+ int32_t op_errno;
+ int32_t count;
+ uintptr_t mask;
+
+ dict_t *xdata;
+ dict_t *dict;
+ int32_t int32;
+ uintptr_t uintptr[3];
+ uint64_t size;
+ uint64_t version[2];
+ inode_t *inode;
+ fd_t *fd;
+ struct statvfs statvfs;
+ struct iatt iatt[5];
+ struct gf_flock flock;
+ struct iovec *vector;
+ struct iobref *buffers;
+ char *str;
+ gf_dirent_t entries;
+ off_t offset;
+ gf_seek_what_t what;
+};
+
+enum _ec_gf_opcode {
+ EC_GF_OP_LOAD,
+ EC_GF_OP_STORE,
+ EC_GF_OP_COPY,
+ EC_GF_OP_XOR2,
+ EC_GF_OP_XOR3,
+ EC_GF_OP_XORM,
+ EC_GF_OP_END
+};
+
+struct _ec_gf_op {
+ ec_gf_opcode_t op;
+ uint32_t arg1;
+ uint32_t arg2;
+ uint32_t arg3;
+};
+
+struct _ec_gf_mul {
+ uint32_t regs;
+ uint32_t map[EC_GF_MAX_REGS];
+ ec_gf_op_t *ops;
+};
+
+struct _ec_gf {
+ uint32_t bits;
+ uint32_t size;
+ uint32_t mod;
+ uint32_t min_ops;
+ uint32_t max_ops;
+ uint32_t avg_ops;
+ uint32_t *log;
+ uint32_t *pow;
+ ec_gf_mul_t **table;
+};
+
+struct _ec_code_gen {
+ char *name;
+ char **flags;
+ uint32_t width;
+
+ void (*prolog)(ec_code_builder_t *builder);
+ void (*epilog)(ec_code_builder_t *builder);
+ void (*load)(ec_code_builder_t *builder, uint32_t reg, uint32_t offset,
+ uint32_t bit);
+ void (*store)(ec_code_builder_t *builder, uint32_t reg, uint32_t bit);
+ void (*copy)(ec_code_builder_t *builder, uint32_t dst, uint32_t src);
+ void (*xor2)(ec_code_builder_t *builder, uint32_t dst, uint32_t src);
+ void (*xor3)(ec_code_builder_t *builder, uint32_t dst, uint32_t src1,
+ uint32_t src2);
+ void (*xorm)(ec_code_builder_t *builder, uint32_t dst, uint32_t offset,
+ uint32_t bit);
+};
+
+struct _ec_code {
+ gf_lock_t lock;
+ struct list_head spaces;
+ ec_gf_t *gf;
+ ec_code_gen_t *gen;
+};
+
+struct _ec_code_arg {
+ uint32_t value;
+};
+
+struct _ec_code_op {
+ ec_gf_opcode_t op;
+ ec_code_arg_t arg1;
+ ec_code_arg_t arg2;
+ ec_code_arg_t arg3;
+};
+
+struct _ec_code_builder {
+ ec_code_t *code;
+ uint64_t address;
+ uint8_t *data;
+ uint32_t size;
+ int32_t error;
+ uint32_t regs;
+ uint32_t bits;
+ uint32_t width;
+ uint32_t count;
+ uint32_t base;
+ uint32_t map[EC_GF_MAX_REGS];
+ gf_boolean_t linear;
+ uint64_t loop;
+ ec_code_op_t ops[0];
+};
+
+struct _ec_code_chunk {
+ struct list_head list;
+ size_t size;
+ ec_code_space_t *space;
+};
+
+struct _ec_code_space {
+ struct list_head list;
+ struct list_head chunks;
+ ec_code_t *code;
+ void *exec;
+ size_t size;
+};
+
+union _ec_code_func {
+ ec_code_func_linear_t linear;
+ ec_code_func_interleaved_t interleaved;
+};
+
+struct _ec_matrix_row {
+ ec_code_func_t func;
+ uint32_t *values;
+};
+
+struct _ec_matrix {
+ struct list_head lru;
+ uint32_t refs;
+ uint32_t columns;
+ uint32_t rows;
+ uintptr_t mask;
+ ec_code_t *code;
+ uint32_t *values;
+ ec_matrix_row_t row_data[0];
+};
+
+struct _ec_matrix_list {
+ struct list_head lru;
+ gf_lock_t lock;
+ uint32_t columns;
+ uint32_t rows;
+ uint32_t max;
+ uint32_t count;
+ uint32_t stripe;
+ struct mem_pool *pool;
+ ec_gf_t *gf;
+ ec_code_t *code;
+ ec_matrix_t *encode;
+ ec_matrix_t **objects;
+};
+
+struct _ec_heal {
+ struct list_head list;
+ gf_lock_t lock;
+ xlator_t *xl;
+ ec_fop_data_t *fop;
+ void *data;
+ ec_fop_data_t *lookup;
+ loc_t loc;
+ struct iatt iatt;
+ char *symlink;
+ fd_t *fd;
+ int32_t partial;
+ int32_t done;
+ int32_t error;
+ gf_boolean_t nameheal;
+ uintptr_t available;
+ uintptr_t good;
+ uintptr_t bad;
+ uintptr_t open;
+ uintptr_t fixed;
+ uint64_t offset;
+ uint64_t size;
+ uint64_t total_size;
+ uint64_t version[2];
+ uint64_t raw_size;
+};
+
+struct subvol_healer {
+ xlator_t *this;
+ int subvol;
+ gf_boolean_t running;
+ gf_boolean_t rerun;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ pthread_t thread;
+};
+
+struct _ec_self_heald {
+ gf_boolean_t iamshd;
+ gf_boolean_t enabled;
+ int timeout;
+ uint32_t max_threads;
+ uint32_t wait_qlength;
+ struct subvol_healer *index_healers;
+ struct subvol_healer *full_healers;
+};
+
+struct _ec_statistics {
+ struct {
+ gf_atomic_t hits; /* Cache hits. */
+ gf_atomic_t misses; /* Cache misses. */
+ gf_atomic_t updates; /* Number of times an existing stripe has
+ been updated with new content. */
+ gf_atomic_t invals; /* Number of times an existing stripe has
+ been invalidated because of truncates
+ or discards. */
+ gf_atomic_t evicts; /* Number of times that an existing entry
+ has been evicted to make room for newer
+ entries. */
+ gf_atomic_t allocs; /* Number of memory allocations made to
+ store stripes. */
+ gf_atomic_t errors; /* Number of errors that have caused extra
+ requests. (Basically memory allocation
+ errors). */
+ } stripe_cache;
+ struct {
+ gf_atomic_t attempted; /*Number of heals attempted on
+ files/directories*/
+ gf_atomic_t completed; /*Number of heals complted on files/directories*/
+ } shd;
+};
+
+struct _ec {
+ xlator_t *xl;
+ int32_t healers;
+ int32_t heal_waiters;
+ int32_t nodes; /* Total number of bricks(n) */
+ int32_t bits_for_nodes;
+ int32_t fragments; /* Data bricks(k) */
+ int32_t redundancy; /* Redundant bricks(m) */
+ uint32_t fragment_size; /* Size of fragment/chunk on a
+ brick. */
+ uint32_t stripe_size; /* (fragment_size * fragments)
+ maximum size of user data
+ stored in one stripe. */
+ int32_t up; /* Represents whether EC volume is
+ up or not. */
+ uint32_t idx;
+ uint32_t xl_up_count; /* Number of UP bricks. */
+ uintptr_t xl_up; /* Bit flag representing UP
+ bricks */
+ uint32_t xl_notify_count; /* Number of notifications. */
+ uintptr_t xl_notify; /* Bit flag representing
+ notification for bricks. */
+ uintptr_t node_mask;
+ uintptr_t read_mask; /*Stores user defined read-mask*/
+ gf_atomic_t async_fop_count; /* Number of on going asynchronous fops. */
+ xlator_t **xl_list;
+ gf_lock_t lock;
+ gf_timer_t *timer;
+ gf_boolean_t shutdown;
+ gf_boolean_t eager_lock;
+ gf_boolean_t other_eager_lock;
+ gf_boolean_t optimistic_changelog;
+ gf_boolean_t parallel_writes;
+ uint32_t stripe_cache;
+ uint32_t quorum_count;
+ uint32_t background_heals;
+ uint32_t heal_wait_qlen;
+ uint32_t self_heal_window_size; /* max size of read/writes */
+ uint32_t eager_lock_timeout;
+ uint32_t other_eager_lock_timeout;
+ struct list_head pending_fops;
+ struct list_head heal_waiting;
+ struct list_head healing;
+ struct mem_pool *fop_pool;
+ struct mem_pool *cbk_pool;
+ struct mem_pool *lock_pool;
+ ec_self_heald_t shd;
+ char vol_uuid[UUID_SIZE + 1];
+ dict_t *leaf_to_subvolid;
+ ec_read_policy_t read_policy;
+ ec_matrix_list_t matrix;
+ ec_statistics_t stats;
+};
+
+#endif /* __EC_TYPES_H__ */
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 5476ac6562e..7344be4968d 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ Copyright (c) 2012-2015 DataLab, s.l. <http://www.datalab.es>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -8,59 +8,63 @@
cases as published by the Free Software Foundation.
*/
-#include "defaults.h"
-#include "statedump.h"
-#include "compat-errno.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/upcall-utils.h>
+#include "ec.h"
+#include "ec-messages.h"
#include "ec-mem-types.h"
+#include "ec-types.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-fops.h"
#include "ec-method.h"
-#include "ec.h"
+#include "ec-code.h"
#include "ec-heald.h"
+#include <glusterfs/events.h>
-#define EC_MAX_FRAGMENTS EC_METHOD_MAX_FRAGMENTS
-/* The maximum number of nodes is derived from the maximum allowed fragments
- * using the rule that redundancy cannot be equal or greater than the number
- * of fragments.
- */
-#define EC_MAX_NODES min(EC_MAX_FRAGMENTS * 2 - 1, EC_METHOD_MAX_NODES)
+static char *ec_read_policies[EC_READ_POLICY_MAX + 1] = {
+ [EC_ROUND_ROBIN] = "round-robin",
+ [EC_GFID_HASH] = "gfid-hash",
+ [EC_READ_POLICY_MAX] = NULL};
#define EC_INTERNAL_XATTR_OR_GOTO(name, xattr, op_errno, label) \
- do { \
- if (ec_is_internal_xattr (NULL, (char *)name, NULL, NULL)) { \
- op_errno = EPERM; \
- goto label; \
- } \
- if (name && (strlen (name) == 0) && xattr) { \
- /* Bulk [f]removexattr/[f]setxattr */ \
- GF_IF_INTERNAL_XATTR_GOTO (EC_XATTR_PREFIX"*", xattr, \
- op_errno, label); \
- } \
- } while (0)
-
-int32_t ec_parse_options(xlator_t * this)
-{
- ec_t * ec = this->private;
+ do { \
+ if (ec_is_internal_xattr(NULL, (char *)name, NULL, NULL)) { \
+ op_errno = EPERM; \
+ goto label; \
+ } \
+ if (name && (strlen(name) == 0) && xattr) { \
+ /* Bulk [f]removexattr/[f]setxattr */ \
+ GF_IF_INTERNAL_XATTR_GOTO(EC_XATTR_PREFIX "*", xattr, op_errno, \
+ label); \
+ } \
+ } while (0)
+
+int32_t
+ec_parse_options(xlator_t *this)
+{
+ ec_t *ec = this->private;
int32_t error = EINVAL;
uintptr_t mask;
GF_OPTION_INIT("redundancy", ec->redundancy, int32, out);
ec->fragments = ec->nodes - ec->redundancy;
if ((ec->redundancy < 1) || (ec->redundancy >= ec->fragments) ||
- (ec->fragments > EC_MAX_FRAGMENTS))
- {
- gf_log(this->name, GF_LOG_ERROR, "Invalid redundancy (must be between "
- "1 and %d)", (ec->nodes - 1) / 2);
+ (ec->fragments > EC_MAX_FRAGMENTS)) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_REDUNDANCY,
+ "Invalid redundancy (must be between "
+ "1 and %d)",
+ (ec->nodes - 1) / 2);
goto out;
}
ec->bits_for_nodes = 1;
mask = 2;
- while (ec->nodes > mask)
- {
+ while (ec->nodes > mask) {
ec->bits_for_nodes++;
mask <<= 1;
}
@@ -68,9 +72,10 @@ int32_t ec_parse_options(xlator_t * this)
ec->fragment_size = EC_METHOD_CHUNK_SIZE;
ec->stripe_size = ec->fragment_size * ec->fragments;
- gf_log("ec", GF_LOG_DEBUG, "Initialized with: nodes=%u, fragments=%u, "
- "stripe_size=%u, node_mask=%lX",
- ec->nodes, ec->fragments, ec->stripe_size, ec->node_mask);
+ gf_msg_debug("ec", 0,
+ "Initialized with: nodes=%u, fragments=%u, "
+ "stripe_size=%u, node_mask=%" PRIxFAST32,
+ ec->nodes, ec->fragments, ec->stripe_size, ec->node_mask);
error = 0;
@@ -78,28 +83,28 @@ out:
return error;
}
-int32_t ec_prepare_childs(xlator_t * this)
+int32_t
+ec_prepare_childs(xlator_t *this)
{
- ec_t * ec = this->private;
- xlator_list_t * child = NULL;
+ ec_t *ec = this->private;
+ xlator_list_t *child = NULL;
int32_t count = 0;
- for (child = this->children; child != NULL; child = child->next)
- {
+ for (child = this->children; child != NULL; child = child->next) {
count++;
}
- if (count > EC_MAX_NODES)
- {
- gf_log(this->name, GF_LOG_ERROR, "Too many subvolumes");
+ if (count > EC_MAX_NODES) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, EC_MSG_TOO_MANY_SUBVOLS,
+ "Too many subvolumes");
return EINVAL;
}
ec->nodes = count;
ec->xl_list = GF_CALLOC(count, sizeof(ec->xl_list[0]), ec_mt_xlator_t);
- if (ec->xl_list == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Allocation of xlator list failed");
+ if (ec->xl_list == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Allocation of xlator list failed");
return ENOMEM;
}
@@ -107,8 +112,7 @@ int32_t ec_prepare_childs(xlator_t * this)
ec->xl_up_count = 0;
count = 0;
- for (child = this->children; child != NULL; child = child->next)
- {
+ for (child = this->children; child != NULL; child = child->next) {
ec->xl_list[count++] = child->xlator;
}
@@ -117,43 +121,42 @@ int32_t ec_prepare_childs(xlator_t * this)
/* This function transforms the subvol to subvol-id*/
static int
-_subvol_to_subvolid (dict_t *this, char *key, data_t *value, void *data)
-{
- ec_t *ec = data;
- xlator_t *subvol = NULL;
- int i = 0;
- int ret = -1;
-
- subvol = data_to_ptr (value);
- for (i = 0; i < ec->nodes; i++) {
- if (ec->xl_list[i] == subvol) {
- ret = dict_set_int32 (this, key, i);
- /* -1 stops dict_foreach and returns -1*/
- if (ret < 0)
- ret = -1;
- goto out;
- }
+_subvol_to_subvolid(dict_t *this, char *key, data_t *value, void *data)
+{
+ ec_t *ec = data;
+ xlator_t *subvol = NULL;
+ int i = 0;
+ int ret = -1;
+
+ subvol = data_to_ptr(value);
+ for (i = 0; i < ec->nodes; i++) {
+ if (ec->xl_list[i] == subvol) {
+ ret = dict_set_int32(this, key, i);
+ /* -1 stops dict_foreach and returns -1*/
+ if (ret < 0)
+ ret = -1;
+ goto out;
}
+ }
out:
- return ret;
+ return ret;
}
int
-ec_subvol_to_subvol_id_transform (ec_t *ec, dict_t *leaf_to_subvolid)
+ec_subvol_to_subvol_id_transform(ec_t *ec, dict_t *leaf_to_subvolid)
{
- return dict_foreach (leaf_to_subvolid, _subvol_to_subvolid, ec);
+ return dict_foreach(leaf_to_subvolid, _subvol_to_subvolid, ec);
}
-void __ec_destroy_private(xlator_t * this)
+void
+__ec_destroy_private(xlator_t *this)
{
- ec_t * ec = this->private;
+ ec_t *ec = this->private;
- if (ec != NULL)
- {
+ if (ec != NULL) {
LOCK(&ec->lock);
- if (ec->timer != NULL)
- {
+ if (ec->timer != NULL) {
gf_timer_call_cancel(this->ctx, ec->timer);
ec->timer = NULL;
}
@@ -171,41 +174,41 @@ void __ec_destroy_private(xlator_t * this)
sleep(2);
this->private = NULL;
- if (ec->xl_list != NULL)
- {
+ if (ec->xl_list != NULL) {
GF_FREE(ec->xl_list);
ec->xl_list = NULL;
}
- if (ec->fop_pool != NULL)
- {
+ if (ec->fop_pool != NULL) {
mem_pool_destroy(ec->fop_pool);
}
- if (ec->cbk_pool != NULL)
- {
+ if (ec->cbk_pool != NULL) {
mem_pool_destroy(ec->cbk_pool);
}
- if (ec->lock_pool != NULL)
- {
+ if (ec->lock_pool != NULL) {
mem_pool_destroy(ec->lock_pool);
}
LOCK_DESTROY(&ec->lock);
if (ec->leaf_to_subvolid)
- dict_unref (ec->leaf_to_subvolid);
+ dict_unref(ec->leaf_to_subvolid);
+
+ ec_method_fini(&ec->matrix);
+
GF_FREE(ec);
}
}
-int32_t mem_acct_init(xlator_t * this)
+int32_t
+mem_acct_init(xlator_t *this)
{
- if (xlator_mem_acct_init(this, ec_mt_end + 1) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Memory accounting initialization "
- "failed.");
+ if (xlator_mem_acct_init(this, ec_mt_end + 1) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Memory accounting initialization "
+ "failed.");
return -1;
}
@@ -213,261 +216,583 @@ int32_t mem_acct_init(xlator_t * this)
return 0;
}
-int32_t
-reconfigure (xlator_t *this, dict_t *options)
+void
+ec_configure_background_heal_opts(ec_t *ec, int background_heals,
+ int heal_wait_qlen)
{
- ec_t *ec = this->private;
+ if (background_heals == 0) {
+ ec->heal_wait_qlen = 0;
+ } else {
+ ec->heal_wait_qlen = heal_wait_qlen;
+ }
+ ec->background_heals = background_heals;
+}
- GF_OPTION_RECONF ("self-heal-daemon", ec->shd.enabled, options, bool, failed);
- GF_OPTION_RECONF ("iam-self-heal-daemon", ec->shd.iamshd, options,
- bool, failed);
+int
+ec_assign_read_policy(ec_t *ec, char *read_policy)
+{
+ int read_policy_idx = -1;
- return 0;
-failed:
+ read_policy_idx = gf_get_index_by_elem(ec_read_policies, read_policy);
+ if (read_policy_idx < 0 || read_policy_idx >= EC_READ_POLICY_MAX)
return -1;
+
+ ec->read_policy = read_policy_idx;
+ return 0;
}
-glusterfs_event_t
-ec_get_event_from_state (ec_t *ec)
-{
- int down_count = 0;
+int32_t
+reconfigure(xlator_t *this, dict_t *options)
+{
+ ec_t *ec = this->private;
+ char *read_policy = NULL;
+ char *extensions = NULL;
+ uint32_t heal_wait_qlen = 0;
+ uint32_t background_heals = 0;
+ int32_t ret = -1;
+ int32_t err;
+
+ GF_OPTION_RECONF("cpu-extensions", extensions, options, str, failed);
+
+ GF_OPTION_RECONF("self-heal-daemon", ec->shd.enabled, options, bool,
+ failed);
+ GF_OPTION_RECONF("iam-self-heal-daemon", ec->shd.iamshd, options, bool,
+ failed);
+ GF_OPTION_RECONF("eager-lock", ec->eager_lock, options, bool, failed);
+ GF_OPTION_RECONF("other-eager-lock", ec->other_eager_lock, options, bool,
+ failed);
+ GF_OPTION_RECONF("eager-lock-timeout", ec->eager_lock_timeout, options,
+ uint32, failed);
+ GF_OPTION_RECONF("other-eager-lock-timeout", ec->other_eager_lock_timeout,
+ options, uint32, failed);
+ GF_OPTION_RECONF("background-heals", background_heals, options, uint32,
+ failed);
+ GF_OPTION_RECONF("heal-wait-qlength", heal_wait_qlen, options, uint32,
+ failed);
+ GF_OPTION_RECONF("self-heal-window-size", ec->self_heal_window_size,
+ options, uint32, failed);
+ GF_OPTION_RECONF("heal-timeout", ec->shd.timeout, options, int32, failed);
+ ec_configure_background_heal_opts(ec, background_heals, heal_wait_qlen);
+ GF_OPTION_RECONF("shd-max-threads", ec->shd.max_threads, options, uint32,
+ failed);
+ GF_OPTION_RECONF("shd-wait-qlength", ec->shd.wait_qlength, options, uint32,
+ failed);
+
+ GF_OPTION_RECONF("read-policy", read_policy, options, str, failed);
+
+ GF_OPTION_RECONF("optimistic-change-log", ec->optimistic_changelog, options,
+ bool, failed);
+ GF_OPTION_RECONF("parallel-writes", ec->parallel_writes, options, bool,
+ failed);
+ GF_OPTION_RECONF("stripe-cache", ec->stripe_cache, options, uint32, failed);
+ GF_OPTION_RECONF("quorum-count", ec->quorum_count, options, uint32, failed);
+ ret = 0;
+ if (ec_assign_read_policy(ec, read_policy)) {
+ ret = -1;
+ }
- if (ec->xl_up_count >= ec->fragments) {
- /* If ec is up but some subvolumes are yet to notify, give
- * grace time for other subvols to notify to prevent start of
- * I/O which may result in self-heals */
- if (ec->timer && ec->xl_notify_count < ec->nodes)
- return GF_EVENT_MAXVAL;
+ err = ec_method_update(this, &ec->matrix, extensions);
+ if (err != 0) {
+ ret = -1;
+ }
- return GF_EVENT_CHILD_UP;
- } else {
- down_count = ec->xl_notify_count - ec->xl_up_count;
- if (down_count > ec->redundancy)
- return GF_EVENT_CHILD_DOWN;
- }
+failed:
+ return ret;
+}
+
+glusterfs_event_t
+ec_get_event_from_state(ec_t *ec)
+{
+ int down_count = 0;
+
+ if (ec->xl_up_count >= ec->fragments) {
+ /* If ec is up but some subvolumes are yet to notify, give
+ * grace time for other subvols to notify to prevent start of
+ * I/O which may result in self-heals */
+ if (ec->xl_notify_count < ec->nodes)
+ return GF_EVENT_MAXVAL;
+
+ return GF_EVENT_CHILD_UP;
+ } else {
+ down_count = ec->xl_notify_count - ec->xl_up_count;
+ if (down_count > ec->redundancy)
+ return GF_EVENT_CHILD_DOWN;
+ }
- return GF_EVENT_MAXVAL;
+ return GF_EVENT_MAXVAL;
}
void
-ec_up (xlator_t *this, ec_t *ec)
+ec_up(xlator_t *this, ec_t *ec)
{
- if (ec->timer != NULL) {
- gf_timer_call_cancel (this->ctx, ec->timer);
- ec->timer = NULL;
- }
+ char str1[32], str2[32];
- ec->up = 1;
- gf_log(this->name, GF_LOG_INFO, "Going UP");
+ if (ec->timer != NULL) {
+ gf_timer_call_cancel(this->ctx, ec->timer);
+ ec->timer = NULL;
+ }
+
+ ec->up = 1;
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP,
+ "Going UP : Child UP = %s Child Notify = %s",
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
+
+ gf_event(EVENT_EC_MIN_BRICKS_UP, "subvol=%s", this->name);
}
void
-ec_down (xlator_t *this, ec_t *ec)
+ec_down(xlator_t *this, ec_t *ec)
{
- if (ec->timer != NULL) {
- gf_timer_call_cancel(this->ctx, ec->timer);
- ec->timer = NULL;
- }
+ char str1[32], str2[32];
- ec->up = 0;
- gf_log(this->name, GF_LOG_INFO, "Going DOWN");
+ if (ec->timer != NULL) {
+ gf_timer_call_cancel(this->ctx, ec->timer);
+ ec->timer = NULL;
+ }
+
+ ec->up = 0;
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN,
+ "Going DOWN : Child UP = %s Child Notify = %s",
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
+
+ gf_event(EVENT_EC_MIN_BRICKS_NOT_UP, "subvol=%s", this->name);
}
void
-ec_notify_cbk (void *data)
+ec_notify_cbk(void *data)
{
- ec_t *ec = data;
- glusterfs_event_t event = GF_EVENT_MAXVAL;
+ ec_t *ec = data;
+ glusterfs_event_t event = GF_EVENT_MAXVAL;
+ gf_boolean_t propagate = _gf_false;
+ gf_boolean_t launch_heal = _gf_false;
- LOCK(&ec->lock);
- {
- if (!ec->timer) {
- /*
- * Either child_up/child_down is already sent to parent
- * This is a spurious wake up.
- */
- goto unlock;
- }
-
- gf_timer_call_cancel (ec->xl->ctx, ec->timer);
- ec->timer = NULL;
-
- event = ec_get_event_from_state (ec);
- /* If event is still MAXVAL then enough subvolumes didn't
- * notify, treat it as CHILD_DOWN. */
- if (event == GF_EVENT_MAXVAL) {
- event = GF_EVENT_CHILD_DOWN;
- ec->xl_notify = (1ULL << ec->nodes) - 1ULL;
- ec->xl_notify_count = ec->nodes;
- } else if (event == GF_EVENT_CHILD_UP) {
- /* Rest of the bricks are still not coming up,
- * notify that ec is up. Files/directories will be
- * healed as in when they come up. */
- ec_up (ec->xl, ec);
- }
-
- /* CHILD_DOWN should not come here as no grace period is given
- * for notifying CHILD_DOWN. */
-
- default_notify (ec->xl, event, NULL);
+ LOCK(&ec->lock);
+ {
+ if (!ec->timer) {
+ /*
+ * Either child_up/child_down is already sent to parent
+ * This is a spurious wake up.
+ */
+ goto unlock;
+ }
+
+ gf_timer_call_cancel(ec->xl->ctx, ec->timer);
+ ec->timer = NULL;
+
+ /* The timeout has expired, so any subvolume that has not
+ * already reported its state, will be considered to be down.
+ * We mark as if all bricks had reported. */
+ ec->xl_notify = (1ULL << ec->nodes) - 1ULL;
+ ec->xl_notify_count = ec->nodes;
+
+ /* Since we have marked all subvolumes as notified, it's
+ * guaranteed that ec_get_event_from_state() will return
+ * CHILD_UP or CHILD_DOWN, but not MAXVAL. */
+ event = ec_get_event_from_state(ec);
+ if (event == GF_EVENT_CHILD_UP) {
+ /* We are ready to bring the volume up. If there are
+ * still bricks DOWN, they will be healed when they
+ * come up. */
+ ec_up(ec->xl, ec);
+
+ if (ec->shd.iamshd && !ec->shutdown) {
+ launch_heal = _gf_true;
+ GF_ATOMIC_INC(ec->async_fop_count);
+ }
}
+
+ propagate = _gf_true;
+ }
unlock:
- UNLOCK(&ec->lock);
+ UNLOCK(&ec->lock);
+
+ if (launch_heal) {
+ /* We have just brought the volume UP, so we trigger
+ * a self-heal check on the root directory. */
+ ec_launch_replace_heal(ec);
+ }
+ if (propagate) {
+ default_notify(ec->xl, event, NULL);
+ }
}
void
-ec_launch_notify_timer (xlator_t *this, ec_t *ec)
+ec_launch_notify_timer(xlator_t *this, ec_t *ec)
+{
+ struct timespec delay = {
+ 0,
+ };
+
+ gf_msg_debug(this->name, 0, "Initiating child-down timer");
+ delay.tv_sec = 10;
+ delay.tv_nsec = 0;
+ ec->timer = gf_timer_call_after(this->ctx, delay, ec_notify_cbk, ec);
+ if (ec->timer == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_TIMER_CREATE_FAIL,
+ "Cannot create timer "
+ "for delayed initialization");
+ }
+}
+
+gf_boolean_t
+ec_disable_delays(ec_t *ec)
{
- struct timespec delay = {0, };
+ ec->shutdown = _gf_true;
- gf_log (this->name, GF_LOG_DEBUG, "Initiating child-down timer");
- delay.tv_sec = 10;
- delay.tv_nsec = 0;
- ec->timer = gf_timer_call_after (this->ctx, delay, ec_notify_cbk, ec);
- if (ec->timer == NULL) {
- gf_log(this->name, GF_LOG_ERROR, "Cannot create timer "
- "for delayed initialization");
- }
+ return __ec_is_last_fop(ec);
}
void
-ec_handle_up (xlator_t *this, ec_t *ec, int32_t idx)
+ec_cleanup_healer_object(ec_t *ec)
{
- if (((ec->xl_notify >> idx) & 1) == 0) {
- ec->xl_notify |= 1ULL << idx;
- ec->xl_notify_count++;
+ struct subvol_healer *healer = NULL;
+ ec_self_heald_t *shd = NULL;
+ void *res = NULL;
+ int i = 0;
+ gf_boolean_t is_join = _gf_false;
+
+ shd = &ec->shd;
+ if (!shd->iamshd)
+ return;
+
+ for (i = 0; i < ec->nodes; i++) {
+ healer = &shd->index_healers[i];
+ pthread_mutex_lock(&healer->mutex);
+ {
+ healer->rerun = 1;
+ if (healer->running) {
+ pthread_cond_signal(&healer->cond);
+ is_join = _gf_true;
+ }
+ }
+ pthread_mutex_unlock(&healer->mutex);
+ if (is_join) {
+ pthread_join(healer->thread, &res);
+ is_join = _gf_false;
}
- if (((ec->xl_up >> idx) & 1) == 0) { /* Duplicate event */
- ec->xl_up |= 1ULL << idx;
- ec->xl_up_count++;
+ healer = &shd->full_healers[i];
+ pthread_mutex_lock(&healer->mutex);
+ {
+ healer->rerun = 1;
+ if (healer->running) {
+ pthread_cond_signal(&healer->cond);
+ is_join = _gf_true;
+ }
+ }
+ pthread_mutex_unlock(&healer->mutex);
+ if (is_join) {
+ pthread_join(healer->thread, &res);
+ is_join = _gf_false;
}
+ }
}
-
void
-ec_handle_down (xlator_t *this, ec_t *ec, int32_t idx)
+ec_pending_fops_completed(ec_t *ec)
{
- if (((ec->xl_notify >> idx) & 1) == 0) {
- ec->xl_notify |= 1ULL << idx;
- ec->xl_notify_count++;
- }
+ if (ec->shutdown) {
+ default_notify(ec->xl, GF_EVENT_PARENT_DOWN, NULL);
+ }
+}
- if (((ec->xl_up >> idx) & 1) != 0) { /* Duplicate event */
- gf_log(this->name, GF_LOG_DEBUG, "Child %d is DOWN", idx);
+static gf_boolean_t
+ec_set_up_state(ec_t *ec, uintptr_t index_mask, uintptr_t new_state)
+{
+ uintptr_t current_state = 0;
- ec->xl_up ^= 1ULL << idx;
- ec->xl_up_count--;
- }
+ if (xlator_is_cleanup_starting(ec->xl))
+ return _gf_false;
+
+ if ((ec->xl_notify & index_mask) == 0) {
+ ec->xl_notify |= index_mask;
+ ec->xl_notify_count++;
+ }
+ current_state = ec->xl_up & index_mask;
+ if (current_state != new_state) {
+ ec->xl_up ^= index_mask;
+ ec->xl_up_count += (current_state ? -1 : 1);
+
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+static gf_boolean_t
+ec_upcall(ec_t *ec, struct gf_upcall *upcall)
+{
+ struct gf_upcall_cache_invalidation *ci = NULL;
+ struct gf_upcall_inodelk_contention *lc = NULL;
+ inode_t *inode;
+ inode_table_t *table;
+
+ switch (upcall->event_type) {
+ case GF_UPCALL_CACHE_INVALIDATION:
+ ci = upcall->data;
+ ci->flags |= UP_INVAL_ATTR;
+ return _gf_true;
+
+ case GF_UPCALL_INODELK_CONTENTION:
+ lc = upcall->data;
+ if (strcmp(lc->domain, ec->xl->name) != 0) {
+ /* The lock is not owned by EC, ignore it. */
+ return _gf_true;
+ }
+ table = ((xlator_t *)ec->xl->graph->top)->itable;
+ if (table == NULL) {
+ /* Self-heal daemon doesn't have an inode table on the top
+ * xlator because it doesn't need it. In this case we should
+ * use the inode table managed by EC itself where all inodes
+ * being healed should be present. However self-heal doesn't
+ * use eager-locking and inodelk's are already released as
+ * soon as possible. In this case we can safely ignore these
+ * notifications. */
+ return _gf_false;
+ }
+ inode = inode_find(table, upcall->gfid);
+ /* If inode is not found, it means that it's already released,
+ * so we can ignore it. Probably it has been released and
+ * destroyed while the contention notification was being sent.
+ */
+ if (inode != NULL) {
+ ec_lock_release(ec, inode);
+ inode_unref(inode);
+ }
+
+ return _gf_false;
+
+ default:
+ return _gf_true;
+ }
}
int32_t
-ec_notify (xlator_t *this, int32_t event, void *data, void *data2)
-{
- ec_t *ec = this->private;
- int32_t idx = 0;
- int32_t error = 0;
- glusterfs_event_t old_event = GF_EVENT_MAXVAL;
- glusterfs_event_t new_event = GF_EVENT_MAXVAL;
- dict_t *input = NULL;
- dict_t *output = NULL;
-
- if (event == GF_EVENT_TRANSLATOR_OP) {
- if (!ec->up) {
- error = -1;
- goto out;
- } else {
- input = data;
- output = data2;
- error = ec_xl_op (this, input, output);
- }
- goto out;
- }
+ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
+{
+ ec_t *ec = this->private;
+ int32_t idx = 0;
+ int32_t error = 0;
+ glusterfs_event_t old_event = GF_EVENT_MAXVAL;
+ dict_t *input = NULL;
+ dict_t *output = NULL;
+ gf_boolean_t propagate = _gf_true;
+ gf_boolean_t needs_shd_check = _gf_false;
+ int32_t orig_event = event;
+ uintptr_t mask = 0;
+
+ gf_msg_trace(this->name, 0, "NOTIFY(%d): %p, %p", event, data, data2);
+
+ if (event == GF_EVENT_UPCALL) {
+ propagate = ec_upcall(ec, data);
+ goto done;
+ }
- for (idx = 0; idx < ec->nodes; idx++) {
- if (ec->xl_list[idx] == data) {
- if (event == GF_EVENT_CHILD_UP)
- ec_selfheal_childup (ec, idx);
- break;
- }
+ if (event == GF_EVENT_TRANSLATOR_OP) {
+ if (!ec->up) {
+ error = -1;
+ } else {
+ input = data;
+ output = data2;
+ error = ec_xl_op(this, input, output);
}
+ goto out;
+ }
- LOCK (&ec->lock);
-
- if (event == GF_EVENT_PARENT_UP) {
- /*
- * Start a timer which sends appropriate event to parent
- * xlator to prevent the 'mount' syscall from hanging.
- */
- ec_launch_notify_timer (this, ec);
- goto unlock;
+ for (idx = 0; idx < ec->nodes; idx++) {
+ if (ec->xl_list[idx] == data) {
+ break;
}
+ }
- gf_log (this->name, GF_LOG_TRACE, "NOTIFY(%d): %p, %d",
- event, data, idx);
+ LOCK(&ec->lock);
- if (idx < ec->nodes) { /* CHILD_* events */
+ if (event == GF_EVENT_PARENT_UP) {
+ /*
+ * Start a timer which sends appropriate event to parent
+ * xlator to prevent the 'mount' syscall from hanging.
+ */
+ ec_launch_notify_timer(this, ec);
+ goto unlock;
+ } else if (event == GF_EVENT_PARENT_DOWN) {
+ /* If there aren't pending fops running after we have waken up
+ * them, we immediately propagate the notification. */
+ propagate = ec_disable_delays(ec);
+ ec_cleanup_healer_object(ec);
+ goto unlock;
+ }
- old_event = ec_get_event_from_state (ec);
+ if (idx < ec->nodes) { /* CHILD_* events */
+ old_event = ec_get_event_from_state(ec);
+
+ mask = 1ULL << idx;
+ if (event == GF_EVENT_CHILD_UP) {
+ /* We need to trigger a selfheal if a brick changes
+ * to UP state. */
+ if (ec_set_up_state(ec, mask, mask) && ec->shd.iamshd &&
+ !ec->shutdown) {
+ needs_shd_check = _gf_true;
+ }
+ } else if (event == GF_EVENT_CHILD_DOWN) {
+ ec_set_up_state(ec, mask, 0);
+ }
- if (event == GF_EVENT_CHILD_UP) {
- ec_handle_up (this, ec, idx);
- } else if (event == GF_EVENT_CHILD_DOWN) {
- ec_handle_down (this, ec, idx);
- }
+ event = ec_get_event_from_state(ec);
- new_event = ec_get_event_from_state (ec);
+ if (event == GF_EVENT_CHILD_UP) {
+ if (!ec->up) {
+ ec_up(this, ec);
+ }
+ } else {
+ /* If the volume is not UP, it's irrelevant if one
+ * brick has come up. We cannot heal anything. */
+ needs_shd_check = _gf_false;
- if (new_event == GF_EVENT_CHILD_UP && !ec->up) {
- ec_up (this, ec);
- } else if (new_event == GF_EVENT_CHILD_DOWN && ec->up) {
- ec_down (this, ec);
- }
+ if ((event == GF_EVENT_CHILD_DOWN) && ec->up) {
+ ec_down(this, ec);
+ }
+ }
- if ((new_event == old_event) && (new_event != GF_EVENT_MAXVAL))
- new_event = GF_EVENT_CHILD_MODIFIED;
+ if (event != GF_EVENT_MAXVAL) {
+ if (event == old_event) {
+ if (orig_event == GF_EVENT_CHILD_UP)
+ event = GF_EVENT_SOME_DESCENDENT_UP;
+ else /* orig_event has to be GF_EVENT_CHILD_DOWN */
+ event = GF_EVENT_SOME_DESCENDENT_DOWN;
+ }
+ } else {
+ propagate = _gf_false;
+ needs_shd_check = _gf_false;
+ }
- event = GF_EVENT_MAXVAL;/* Take care of notifying inside lock */
- if (new_event != GF_EVENT_MAXVAL)
- error = default_notify (this, new_event, data);
+ if (needs_shd_check) {
+ GF_ATOMIC_INC(ec->async_fop_count);
}
- unlock:
- UNLOCK (&ec->lock);
+ }
+unlock:
+ UNLOCK(&ec->lock);
+
+done:
+ if (needs_shd_check) {
+ ec_launch_replace_heal(ec);
+ }
+ if (propagate) {
+ error = default_notify(this, event, data);
+ }
- if (event != GF_EVENT_MAXVAL)
- return default_notify (this, event, data);
out:
- return error;
+ return error;
}
int32_t
-notify (xlator_t *this, int32_t event, void *data, ...)
+notify(xlator_t *this, int32_t event, void *data, ...)
{
- int ret = -1;
- va_list ap;
- void *data2 = NULL;
+ int ret = -1;
+ va_list ap;
+ void *data2 = NULL;
- va_start (ap, data);
- data2 = va_arg (ap, dict_t*);
- va_end (ap);
- ret = ec_notify (this, event, data, data2);
+ va_start(ap, data);
+ data2 = va_arg(ap, dict_t *);
+ va_end(ap);
+ ret = ec_notify(this, event, data, data2);
- return ret;
+ return ret;
+}
+
+static void
+ec_statistics_init(ec_t *ec)
+{
+ GF_ATOMIC_INIT(ec->stats.stripe_cache.hits, 0);
+ GF_ATOMIC_INIT(ec->stats.stripe_cache.misses, 0);
+ GF_ATOMIC_INIT(ec->stats.stripe_cache.updates, 0);
+ GF_ATOMIC_INIT(ec->stats.stripe_cache.invals, 0);
+ GF_ATOMIC_INIT(ec->stats.stripe_cache.evicts, 0);
+ GF_ATOMIC_INIT(ec->stats.stripe_cache.allocs, 0);
+ GF_ATOMIC_INIT(ec->stats.stripe_cache.errors, 0);
+ GF_ATOMIC_INIT(ec->stats.shd.attempted, 0);
+ GF_ATOMIC_INIT(ec->stats.shd.completed, 0);
+}
+
+static int
+ec_assign_read_mask(ec_t *ec, char *read_mask_str)
+{
+ char *mask = NULL;
+ char *maskptr = NULL;
+ char *saveptr = NULL;
+ char *id_str = NULL;
+ int id = 0;
+ int ret = 0;
+ uintptr_t read_mask = 0;
+
+ if (!read_mask_str) {
+ ec->read_mask = 0;
+ ret = 0;
+ goto out;
+ }
+
+ mask = gf_strdup(read_mask_str);
+ if (!mask) {
+ ret = -1;
+ goto out;
+ }
+ maskptr = mask;
+
+ for (;;) {
+ id_str = strtok_r(maskptr, ":", &saveptr);
+ if (id_str == NULL)
+ break;
+ if (gf_string2int(id_str, &id)) {
+ gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL,
+ "In read-mask \"%s\" id %s is not a valid integer",
+ read_mask_str, id_str);
+ ret = -1;
+ goto out;
+ }
+
+ if ((id < 0) || (id >= ec->nodes)) {
+ gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL,
+ "In read-mask \"%s\" id %d is not in range [0 - %d]",
+ read_mask_str, id, ec->nodes - 1);
+ ret = -1;
+ goto out;
+ }
+ read_mask |= (1UL << id);
+ maskptr = NULL;
+ }
+
+ if (gf_bits_count(read_mask) < ec->fragments) {
+ gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL,
+ "read-mask \"%s\" should contain at least %d ids", read_mask_str,
+ ec->fragments);
+ ret = -1;
+ goto out;
+ }
+ ec->read_mask = read_mask;
+ ret = 0;
+out:
+ GF_FREE(mask);
+ return ret;
}
int32_t
-init (xlator_t *this)
+init(xlator_t *this)
{
ec_t *ec = NULL;
-
- if (this->parents == NULL)
- {
- gf_log(this->name, GF_LOG_WARNING, "Volume does not have parents.");
+ char *read_policy = NULL;
+ char *extensions = NULL;
+ int32_t err;
+ char *read_mask_str = NULL;
+
+ if (this->parents == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, EC_MSG_NO_PARENTS,
+ "Volume does not have parents.");
}
ec = GF_MALLOC(sizeof(*ec), ec_mt_ec_t);
- if (ec == NULL)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to allocate private memory.");
+ if (ec == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to allocate private memory.");
return -1;
}
@@ -478,54 +803,105 @@ init (xlator_t *this)
ec->xl = this;
LOCK_INIT(&ec->lock);
+ GF_ATOMIC_INIT(ec->async_fop_count, 0);
+ INIT_LIST_HEAD(&ec->pending_fops);
+ INIT_LIST_HEAD(&ec->heal_waiting);
+ INIT_LIST_HEAD(&ec->healing);
+
ec->fop_pool = mem_pool_new(ec_fop_data_t, 1024);
ec->cbk_pool = mem_pool_new(ec_cbk_data_t, 4096);
ec->lock_pool = mem_pool_new(ec_lock_t, 1024);
if ((ec->fop_pool == NULL) || (ec->cbk_pool == NULL) ||
- (ec->lock_pool == NULL))
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to create memory pools.");
+ (ec->lock_pool == NULL)) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to create memory pools.");
goto failed;
}
- if (ec_prepare_childs(this) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to initialize xlator");
+ if (ec_prepare_childs(this) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL,
+ "Failed to initialize xlator");
goto failed;
}
- if (ec_parse_options(this) != 0)
- {
- gf_log(this->name, GF_LOG_ERROR, "Failed to parse xlator options");
+ if (ec_parse_options(this) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, EC_MSG_XLATOR_PARSE_OPT_FAIL,
+ "Failed to parse xlator options");
+
+ goto failed;
+ }
+
+ GF_OPTION_INIT("cpu-extensions", extensions, str, failed);
+
+ err = ec_method_init(this, &ec->matrix, ec->fragments, ec->nodes,
+ ec->nodes * 2, extensions);
+ if (err != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, -err, EC_MSG_MATRIX_FAILED,
+ "Failed to initialize matrix management");
goto failed;
}
- ec_method_initialize();
- GF_OPTION_INIT ("self-heal-daemon", ec->shd.enabled, bool, failed);
- GF_OPTION_INIT ("iam-self-heal-daemon", ec->shd.iamshd, bool, failed);
+ GF_OPTION_INIT("self-heal-daemon", ec->shd.enabled, bool, failed);
+ GF_OPTION_INIT("iam-self-heal-daemon", ec->shd.iamshd, bool, failed);
+ GF_OPTION_INIT("eager-lock", ec->eager_lock, bool, failed);
+ GF_OPTION_INIT("other-eager-lock", ec->other_eager_lock, bool, failed);
+ GF_OPTION_INIT("eager-lock-timeout", ec->eager_lock_timeout, uint32,
+ failed);
+ GF_OPTION_INIT("other-eager-lock-timeout", ec->other_eager_lock_timeout,
+ uint32, failed);
+ GF_OPTION_INIT("background-heals", ec->background_heals, uint32, failed);
+ GF_OPTION_INIT("heal-wait-qlength", ec->heal_wait_qlen, uint32, failed);
+ GF_OPTION_INIT("self-heal-window-size", ec->self_heal_window_size, uint32,
+ failed);
+ ec_configure_background_heal_opts(ec, ec->background_heals,
+ ec->heal_wait_qlen);
+ GF_OPTION_INIT("read-policy", read_policy, str, failed);
+ if (ec_assign_read_policy(ec, read_policy))
+ goto failed;
+
+ GF_OPTION_INIT("heal-timeout", ec->shd.timeout, int32, failed);
+ GF_OPTION_INIT("shd-max-threads", ec->shd.max_threads, uint32, failed);
+ GF_OPTION_INIT("shd-wait-qlength", ec->shd.wait_qlength, uint32, failed);
+ GF_OPTION_INIT("optimistic-change-log", ec->optimistic_changelog, bool,
+ failed);
+ GF_OPTION_INIT("parallel-writes", ec->parallel_writes, bool, failed);
+ GF_OPTION_INIT("stripe-cache", ec->stripe_cache, uint32, failed);
+ GF_OPTION_INIT("quorum-count", ec->quorum_count, uint32, failed);
+ GF_OPTION_INIT("ec-read-mask", read_mask_str, str, failed);
+
+ if (ec_assign_read_mask(ec, read_mask_str))
+ goto failed;
+
+ this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable)
+ goto failed;
if (ec->shd.iamshd)
- ec_selfheal_daemon_init (this);
- gf_log(this->name, GF_LOG_DEBUG, "Disperse translator initialized.");
+ ec_selfheal_daemon_init(this);
+ gf_msg_debug(this->name, 0, "Disperse translator initialized.");
- ec->leaf_to_subvolid = dict_new ();
+ ec->leaf_to_subvolid = dict_new();
if (!ec->leaf_to_subvolid)
- goto failed;
- if (glusterfs_reachable_leaves (this, ec->leaf_to_subvolid)) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to build subvol "
- "dictionary");
- goto failed;
+ goto failed;
+ if (glusterfs_reachable_leaves(this, ec->leaf_to_subvolid)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_SUBVOL_BUILD_FAIL,
+ "Failed to build subvol "
+ "dictionary");
+ goto failed;
}
- if (ec_subvol_to_subvol_id_transform (ec, ec->leaf_to_subvolid) < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to build subvol-id "
- "dictionary");
- goto failed;
+ if (ec_subvol_to_subvol_id_transform(ec, ec->leaf_to_subvolid) < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_SUBVOL_ID_DICT_SET_FAIL,
+ "Failed to build subvol-id "
+ "dictionary");
+ goto failed;
}
+ ec_statistics_init(ec);
+
return 0;
failed:
@@ -534,13 +910,16 @@ failed:
return -1;
}
-void fini(xlator_t * this)
+void
+fini(xlator_t *this)
{
+ ec_selfheal_daemon_fini(this);
__ec_destroy_private(this);
}
-int32_t ec_gf_access(call_frame_t * frame, xlator_t * this, loc_t * loc,
- int32_t mask, dict_t * xdata)
+int32_t
+ec_gf_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
ec_access(frame, this, -1, EC_MINIMUM_ONE, default_access_cbk, NULL, loc,
mask, xdata);
@@ -548,9 +927,9 @@ int32_t ec_gf_access(call_frame_t * frame, xlator_t * this, loc_t * loc,
return 0;
}
-int32_t ec_gf_create(call_frame_t * frame, xlator_t * this, loc_t * loc,
- int32_t flags, mode_t mode, mode_t umask, fd_t * fd,
- dict_t * xdata)
+int32_t
+ec_gf_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
ec_create(frame, this, -1, EC_MINIMUM_MIN, default_create_cbk, NULL, loc,
flags, mode, umask, fd, xdata);
@@ -558,45 +937,58 @@ int32_t ec_gf_create(call_frame_t * frame, xlator_t * this, loc_t * loc,
return 0;
}
-int32_t ec_gf_discard(call_frame_t * frame, xlator_t * this, fd_t * fd,
- off_t offset, size_t len, dict_t * xdata)
+int32_t
+ec_gf_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- default_discard_failure_cbk(frame, ENOTSUP);
+ ec_discard(frame, this, -1, EC_MINIMUM_MIN, default_discard_cbk, NULL, fd,
+ offset, len, xdata);
return 0;
}
-int32_t ec_gf_entrylk(call_frame_t * frame, xlator_t * this,
- const char * volume, loc_t * loc, const char * basename,
- entrylk_cmd cmd, entrylk_type type, dict_t * xdata)
+int32_t
+ec_gf_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
{
- ec_entrylk(frame, this, -1, EC_MINIMUM_ALL, default_entrylk_cbk, NULL,
- volume, loc, basename, cmd, type, xdata);
+ uint32_t fop_flags = EC_MINIMUM_ALL;
+
+ if (cmd == ENTRYLK_UNLOCK)
+ fop_flags = EC_MINIMUM_ONE;
+ ec_entrylk(frame, this, -1, fop_flags, default_entrylk_cbk, NULL, volume,
+ loc, basename, cmd, type, xdata);
return 0;
}
-int32_t ec_gf_fentrylk(call_frame_t * frame, xlator_t * this,
- const char * volume, fd_t * fd, const char * basename,
- entrylk_cmd cmd, entrylk_type type, dict_t * xdata)
+int32_t
+ec_gf_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
{
- ec_fentrylk(frame, this, -1, EC_MINIMUM_ALL, default_fentrylk_cbk, NULL,
- volume, fd, basename, cmd, type, xdata);
+ uint32_t fop_flags = EC_MINIMUM_ALL;
+
+ if (cmd == ENTRYLK_UNLOCK)
+ fop_flags = EC_MINIMUM_ONE;
+ ec_fentrylk(frame, this, -1, fop_flags, default_fentrylk_cbk, NULL, volume,
+ fd, basename, cmd, type, xdata);
return 0;
}
-int32_t ec_gf_fallocate(call_frame_t * frame, xlator_t * this, fd_t * fd,
- int32_t keep_size, off_t offset, size_t len,
- dict_t * xdata)
+int32_t
+ec_gf_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
{
- default_fallocate_failure_cbk(frame, ENOTSUP);
+ ec_fallocate(frame, this, -1, EC_MINIMUM_MIN, default_fallocate_cbk, NULL,
+ fd, mode, offset, len, xdata);
return 0;
}
-int32_t ec_gf_flush(call_frame_t * frame, xlator_t * this, fd_t * fd,
- dict_t * xdata)
+int32_t
+ec_gf_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
ec_flush(frame, this, -1, EC_MINIMUM_MIN, default_flush_cbk, NULL, fd,
xdata);
@@ -604,8 +996,9 @@ int32_t ec_gf_flush(call_frame_t * frame, xlator_t * this, fd_t * fd,
return 0;
}
-int32_t ec_gf_fsync(call_frame_t * frame, xlator_t * this, fd_t * fd,
- int32_t datasync, dict_t * xdata)
+int32_t
+ec_gf_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
ec_fsync(frame, this, -1, EC_MINIMUM_MIN, default_fsync_cbk, NULL, fd,
datasync, xdata);
@@ -613,124 +1006,139 @@ int32_t ec_gf_fsync(call_frame_t * frame, xlator_t * this, fd_t * fd,
return 0;
}
-int32_t ec_gf_fsyncdir(call_frame_t * frame, xlator_t * this, fd_t * fd,
- int32_t datasync, dict_t * xdata)
+int32_t
+ec_gf_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- ec_fsyncdir(frame, this, -1, EC_MINIMUM_MIN, default_fsyncdir_cbk, NULL,
- fd, datasync, xdata);
+ ec_fsyncdir(frame, this, -1, EC_MINIMUM_MIN, default_fsyncdir_cbk, NULL, fd,
+ datasync, xdata);
return 0;
}
int
-ec_marker_populate_args (call_frame_t *frame, int type, int *gauge,
- xlator_t **subvols)
+ec_marker_populate_args(call_frame_t *frame, int type, int *gauge,
+ xlator_t **subvols)
{
- xlator_t *this = frame->this;
- ec_t *ec = this->private;
+ xlator_t *this = frame->this;
+ ec_t *ec = this->private;
- memcpy (subvols, ec->xl_list, sizeof (*subvols) * ec->nodes);
+ memcpy(subvols, ec->xl_list, sizeof(*subvols) * ec->nodes);
- if (type == MARKER_XTIME_TYPE) {
- /*Don't error out on ENOENT/ENOTCONN */
- gauge[MCNT_NOTFOUND] = 0;
- gauge[MCNT_ENOTCONN] = 0;
- }
+ if (type == MARKER_XTIME_TYPE) {
+ /*Don't error out on ENOENT/ENOTCONN */
+ gauge[MCNT_NOTFOUND] = 0;
+ gauge[MCNT_ENOTCONN] = 0;
+ }
- return ec->nodes;
+ return ec->nodes;
}
int32_t
-ec_handle_heal_commands (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
+ec_handle_heal_commands(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- dict_t *dict_rsp = NULL;
- int op_ret = -1;
- int op_errno = ENOMEM;
+ dict_t *dict_rsp = NULL;
+ int op_ret = -1;
+ int op_errno = ENOMEM;
- if (!name || strcmp (name, GF_HEAL_INFO))
- return -1;
+ if (!name || strcmp(name, GF_HEAL_INFO))
+ return -1;
- dict_rsp = dict_new ();
- if (dict_rsp == NULL)
- goto out;
+ op_errno = -ec_get_heal_info(this, loc, &dict_rsp);
+ if (op_errno <= 0) {
+ op_errno = op_ret = 0;
+ }
- if (dict_set_str (dict_rsp, "heal-info", "heal") == 0)
- op_ret = 0;
-out:
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict_rsp, NULL);
- if (dict_rsp)
- dict_unref (dict_rsp);
- return 0;
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict_rsp, NULL);
+ if (dict_rsp)
+ dict_unref(dict_rsp);
+ return 0;
}
int32_t
-ec_gf_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
+ec_gf_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- int error = 0;
- ec_t *ec = this->private;
+ int error = 0;
+ ec_t *ec = this->private;
+ int32_t fop_flags = EC_MINIMUM_ONE;
- if (name && strcmp (name, EC_XATTR_HEAL) != 0) {
- EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out);
- }
+ if (name && strcmp(name, EC_XATTR_HEAL) != 0) {
+ EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out);
+ }
- if (ec_handle_heal_commands (frame, this, loc, name, xdata) == 0)
- return 0;
+ if (ec_handle_heal_commands(frame, this, loc, name, xdata) == 0)
+ return 0;
- if (cluster_handle_marker_getxattr (frame, loc, name, ec->vol_uuid,
- NULL, ec_marker_populate_args) == 0)
- return 0;
+ if (cluster_handle_marker_getxattr(frame, loc, name, ec->vol_uuid, NULL,
+ ec_marker_populate_args) == 0)
+ return 0;
- ec_getxattr (frame, this, -1, EC_MINIMUM_MIN, default_getxattr_cbk,
- NULL, loc, name, xdata);
+ if (name && ((fnmatch(GF_XATTR_STIME_PATTERN, name, 0) == 0) ||
+ XATTR_IS_NODE_UUID(name) || XATTR_IS_NODE_UUID_LIST(name))) {
+ fop_flags = EC_MINIMUM_ALL;
+ }
- return 0;
+ ec_getxattr(frame, this, -1, fop_flags, default_getxattr_cbk, NULL, loc,
+ name, xdata);
+
+ return 0;
out:
- error = ENODATA;
- STACK_UNWIND_STRICT (getxattr, frame, -1, error, NULL, NULL);
- return 0;
+ error = ENODATA;
+ STACK_UNWIND_STRICT(getxattr, frame, -1, error, NULL, NULL);
+ return 0;
}
int32_t
-ec_gf_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
+ec_gf_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
{
- int error = 0;
+ int error = 0;
- EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out);
+ EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out);
- ec_fgetxattr (frame, this, -1, EC_MINIMUM_MIN, default_fgetxattr_cbk,
- NULL, fd, name, xdata);
- return 0;
+ ec_fgetxattr(frame, this, -1, EC_MINIMUM_ONE, default_fgetxattr_cbk, NULL,
+ fd, name, xdata);
+ return 0;
out:
- error = ENODATA;
- STACK_UNWIND_STRICT (fgetxattr, frame, -1, error, NULL, NULL);
- return 0;
+ error = ENODATA;
+ STACK_UNWIND_STRICT(fgetxattr, frame, -1, error, NULL, NULL);
+ return 0;
}
-int32_t ec_gf_inodelk(call_frame_t * frame, xlator_t * this,
- const char * volume, loc_t * loc, int32_t cmd,
- struct gf_flock * flock, dict_t * xdata)
+int32_t
+ec_gf_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- ec_inodelk(frame, this, -1, EC_MINIMUM_ALL, default_inodelk_cbk, NULL,
- volume, loc, cmd, flock, xdata);
+ int32_t fop_flags = EC_MINIMUM_ALL;
+
+ if (flock->l_type == F_UNLCK)
+ fop_flags = EC_MINIMUM_ONE;
+
+ ec_inodelk(frame, this, &frame->root->lk_owner, -1, fop_flags,
+ default_inodelk_cbk, NULL, volume, loc, cmd, flock, xdata);
return 0;
}
-int32_t ec_gf_finodelk(call_frame_t * frame, xlator_t * this,
- const char * volume, fd_t * fd, int32_t cmd,
- struct gf_flock * flock, dict_t * xdata)
+int32_t
+ec_gf_finodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- ec_finodelk(frame, this, -1, EC_MINIMUM_ALL, default_finodelk_cbk, NULL,
- volume, fd, cmd, flock, xdata);
+ int32_t fop_flags = EC_MINIMUM_ALL;
+
+ if (flock->l_type == F_UNLCK)
+ fop_flags = EC_MINIMUM_ONE;
+ ec_finodelk(frame, this, &frame->root->lk_owner, -1, fop_flags,
+ default_finodelk_cbk, NULL, volume, fd, cmd, flock, xdata);
return 0;
}
-int32_t ec_gf_link(call_frame_t * frame, xlator_t * this, loc_t * oldloc,
- loc_t * newloc, dict_t * xdata)
+int32_t
+ec_gf_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
ec_link(frame, this, -1, EC_MINIMUM_MIN, default_link_cbk, NULL, oldloc,
newloc, xdata);
@@ -738,17 +1146,22 @@ int32_t ec_gf_link(call_frame_t * frame, xlator_t * this, loc_t * oldloc,
return 0;
}
-int32_t ec_gf_lk(call_frame_t * frame, xlator_t * this, fd_t * fd,
- int32_t cmd, struct gf_flock * flock, dict_t * xdata)
+int32_t
+ec_gf_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
- ec_lk(frame, this, -1, EC_MINIMUM_ALL, default_lk_cbk, NULL, fd, cmd,
- flock, xdata);
+ int32_t fop_flags = EC_MINIMUM_ALL;
+
+ if (flock->l_type == F_UNLCK)
+ fop_flags = EC_MINIMUM_ONE;
+ ec_lk(frame, this, -1, fop_flags, default_lk_cbk, NULL, fd, cmd, flock,
+ xdata);
return 0;
}
-int32_t ec_gf_lookup(call_frame_t * frame, xlator_t * this, loc_t * loc,
- dict_t * xdata)
+int32_t
+ec_gf_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
ec_lookup(frame, this, -1, EC_MINIMUM_MIN, default_lookup_cbk, NULL, loc,
xdata);
@@ -756,8 +1169,9 @@ int32_t ec_gf_lookup(call_frame_t * frame, xlator_t * this, loc_t * loc,
return 0;
}
-int32_t ec_gf_mkdir(call_frame_t * frame, xlator_t * this, loc_t * loc,
- mode_t mode, mode_t umask, dict_t * xdata)
+int32_t
+ec_gf_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
ec_mkdir(frame, this, -1, EC_MINIMUM_MIN, default_mkdir_cbk, NULL, loc,
mode, umask, xdata);
@@ -765,8 +1179,9 @@ int32_t ec_gf_mkdir(call_frame_t * frame, xlator_t * this, loc_t * loc,
return 0;
}
-int32_t ec_gf_mknod(call_frame_t * frame, xlator_t * this, loc_t * loc,
- mode_t mode, dev_t rdev, mode_t umask, dict_t * xdata)
+int32_t
+ec_gf_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
ec_mknod(frame, this, -1, EC_MINIMUM_MIN, default_mknod_cbk, NULL, loc,
mode, rdev, umask, xdata);
@@ -774,17 +1189,19 @@ int32_t ec_gf_mknod(call_frame_t * frame, xlator_t * this, loc_t * loc,
return 0;
}
-int32_t ec_gf_open(call_frame_t * frame, xlator_t * this, loc_t * loc,
- int32_t flags, fd_t * fd, dict_t * xdata)
+int32_t
+ec_gf_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
{
- ec_open(frame, this, -1, EC_MINIMUM_MIN, default_open_cbk, NULL, loc,
- flags, fd, xdata);
+ ec_open(frame, this, -1, EC_MINIMUM_MIN, default_open_cbk, NULL, loc, flags,
+ fd, xdata);
return 0;
}
-int32_t ec_gf_opendir(call_frame_t * frame, xlator_t * this, loc_t * loc,
- fd_t * fd, dict_t * xdata)
+int32_t
+ec_gf_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
ec_opendir(frame, this, -1, EC_MINIMUM_MIN, default_opendir_cbk, NULL, loc,
fd, xdata);
@@ -792,8 +1209,9 @@ int32_t ec_gf_opendir(call_frame_t * frame, xlator_t * this, loc_t * loc,
return 0;
}
-int32_t ec_gf_readdir(call_frame_t * frame, xlator_t * this, fd_t * fd,
- size_t size, off_t offset, dict_t * xdata)
+int32_t
+ec_gf_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
ec_readdir(frame, this, -1, EC_MINIMUM_ONE, default_readdir_cbk, NULL, fd,
size, offset, xdata);
@@ -801,17 +1219,19 @@ int32_t ec_gf_readdir(call_frame_t * frame, xlator_t * this, fd_t * fd,
return 0;
}
-int32_t ec_gf_readdirp(call_frame_t * frame, xlator_t * this, fd_t * fd,
- size_t size, off_t offset, dict_t * xdata)
+int32_t
+ec_gf_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
- ec_readdirp(frame, this, -1, EC_MINIMUM_ONE, default_readdirp_cbk, NULL,
- fd, size, offset, xdata);
+ ec_readdirp(frame, this, -1, EC_MINIMUM_ONE, default_readdirp_cbk, NULL, fd,
+ size, offset, xdata);
return 0;
}
-int32_t ec_gf_readlink(call_frame_t * frame, xlator_t * this, loc_t * loc,
- size_t size, dict_t * xdata)
+int32_t
+ec_gf_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
ec_readlink(frame, this, -1, EC_MINIMUM_ONE, default_readlink_cbk, NULL,
loc, size, xdata);
@@ -819,60 +1239,63 @@ int32_t ec_gf_readlink(call_frame_t * frame, xlator_t * this, loc_t * loc,
return 0;
}
-int32_t ec_gf_readv(call_frame_t * frame, xlator_t * this, fd_t * fd,
- size_t size, off_t offset, uint32_t flags, dict_t * xdata)
+int32_t
+ec_gf_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- ec_readv(frame, this, -1, EC_MINIMUM_MIN, default_readv_cbk, NULL, fd,
- size, offset, flags, xdata);
+ ec_readv(frame, this, -1, EC_MINIMUM_MIN, default_readv_cbk, NULL, fd, size,
+ offset, flags, xdata);
return 0;
}
int32_t
-ec_gf_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
+ec_gf_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- int error = 0;
+ int error = 0;
- EC_INTERNAL_XATTR_OR_GOTO (name, xdata, error, out);
+ EC_INTERNAL_XATTR_OR_GOTO(name, xdata, error, out);
- ec_removexattr (frame, this, -1, EC_MINIMUM_MIN,
- default_removexattr_cbk, NULL, loc, name, xdata);
+ ec_removexattr(frame, this, -1, EC_MINIMUM_MIN, default_removexattr_cbk,
+ NULL, loc, name, xdata);
- return 0;
+ return 0;
out:
- STACK_UNWIND_STRICT (removexattr, frame, -1, error, NULL);
- return 0;
+ STACK_UNWIND_STRICT(removexattr, frame, -1, error, NULL);
+ return 0;
}
int32_t
-ec_gf_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
+ec_gf_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- int error = 0;
+ int error = 0;
- EC_INTERNAL_XATTR_OR_GOTO (name, xdata, error, out);
+ EC_INTERNAL_XATTR_OR_GOTO(name, xdata, error, out);
- ec_fremovexattr (frame, this, -1, EC_MINIMUM_MIN,
- default_fremovexattr_cbk, NULL, fd, name, xdata);
+ ec_fremovexattr(frame, this, -1, EC_MINIMUM_MIN, default_fremovexattr_cbk,
+ NULL, fd, name, xdata);
- return 0;
+ return 0;
out:
- STACK_UNWIND_STRICT (fremovexattr, frame, -1, error, NULL);
- return 0;
+ STACK_UNWIND_STRICT(fremovexattr, frame, -1, error, NULL);
+ return 0;
}
-int32_t ec_gf_rename(call_frame_t * frame, xlator_t * this, loc_t * oldloc,
- loc_t * newloc, dict_t * xdata)
+int32_t
+ec_gf_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- ec_rename(frame, this, -1, EC_MINIMUM_MIN, default_rename_cbk, NULL,
- oldloc, newloc, xdata);
+ ec_rename(frame, this, -1, EC_MINIMUM_MIN, default_rename_cbk, NULL, oldloc,
+ newloc, xdata);
return 0;
}
-int32_t ec_gf_rmdir(call_frame_t * frame, xlator_t * this, loc_t * loc,
- int xflags, dict_t * xdata)
+int32_t
+ec_gf_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
+ dict_t *xdata)
{
ec_rmdir(frame, this, -1, EC_MINIMUM_MIN, default_rmdir_cbk, NULL, loc,
xflags, xdata);
@@ -880,8 +1303,9 @@ int32_t ec_gf_rmdir(call_frame_t * frame, xlator_t * this, loc_t * loc,
return 0;
}
-int32_t ec_gf_setattr(call_frame_t * frame, xlator_t * this, loc_t * loc,
- struct iatt * stbuf, int32_t valid, dict_t * xdata)
+int32_t
+ec_gf_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
ec_setattr(frame, this, -1, EC_MINIMUM_MIN, default_setattr_cbk, NULL, loc,
stbuf, valid, xdata);
@@ -889,51 +1313,52 @@ int32_t ec_gf_setattr(call_frame_t * frame, xlator_t * this, loc_t * loc,
return 0;
}
-int32_t ec_gf_fsetattr(call_frame_t * frame, xlator_t * this, fd_t * fd,
- struct iatt * stbuf, int32_t valid, dict_t * xdata)
+int32_t
+ec_gf_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- ec_fsetattr(frame, this, -1, EC_MINIMUM_MIN, default_fsetattr_cbk, NULL,
- fd, stbuf, valid, xdata);
+ ec_fsetattr(frame, this, -1, EC_MINIMUM_MIN, default_fsetattr_cbk, NULL, fd,
+ stbuf, valid, xdata);
return 0;
}
int32_t
-ec_gf_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *dict, int32_t flags, dict_t *xdata)
+ec_gf_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- int error = 0;
+ int error = 0;
- EC_INTERNAL_XATTR_OR_GOTO ("", dict, error, out);
+ EC_INTERNAL_XATTR_OR_GOTO("", dict, error, out);
- ec_setxattr (frame, this, -1, EC_MINIMUM_MIN, default_setxattr_cbk,
- NULL, loc, dict, flags, xdata);
+ ec_setxattr(frame, this, -1, EC_MINIMUM_MIN, default_setxattr_cbk, NULL,
+ loc, dict, flags, xdata);
- return 0;
+ return 0;
out:
- STACK_UNWIND_STRICT (setxattr, frame, -1, error, NULL);
- return 0;
+ STACK_UNWIND_STRICT(setxattr, frame, -1, error, NULL);
+ return 0;
}
int32_t
-ec_gf_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int32_t flags, dict_t *xdata)
+ec_gf_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- int error = 0;
+ int error = 0;
- EC_INTERNAL_XATTR_OR_GOTO ("", dict, error, out);
+ EC_INTERNAL_XATTR_OR_GOTO("", dict, error, out);
- ec_fsetxattr (frame, this, -1, EC_MINIMUM_MIN, default_fsetxattr_cbk,
- NULL, fd, dict, flags, xdata);
+ ec_fsetxattr(frame, this, -1, EC_MINIMUM_MIN, default_fsetxattr_cbk, NULL,
+ fd, dict, flags, xdata);
- return 0;
+ return 0;
out:
- STACK_UNWIND_STRICT (fsetxattr, frame, -1, error, NULL);
- return 0;
+ STACK_UNWIND_STRICT(fsetxattr, frame, -1, error, NULL);
+ return 0;
}
-int32_t ec_gf_stat(call_frame_t * frame, xlator_t * this, loc_t * loc,
- dict_t * xdata)
+int32_t
+ec_gf_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
ec_stat(frame, this, -1, EC_MINIMUM_MIN, default_stat_cbk, NULL, loc,
xdata);
@@ -941,8 +1366,8 @@ int32_t ec_gf_stat(call_frame_t * frame, xlator_t * this, loc_t * loc,
return 0;
}
-int32_t ec_gf_fstat(call_frame_t * frame, xlator_t * this, fd_t * fd,
- dict_t * xdata)
+int32_t
+ec_gf_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
ec_fstat(frame, this, -1, EC_MINIMUM_MIN, default_fstat_cbk, NULL, fd,
xdata);
@@ -950,8 +1375,8 @@ int32_t ec_gf_fstat(call_frame_t * frame, xlator_t * this, fd_t * fd,
return 0;
}
-int32_t ec_gf_statfs(call_frame_t * frame, xlator_t * this, loc_t * loc,
- dict_t * xdata)
+int32_t
+ec_gf_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
ec_statfs(frame, this, -1, EC_MINIMUM_MIN, default_statfs_cbk, NULL, loc,
xdata);
@@ -959,9 +1384,9 @@ int32_t ec_gf_statfs(call_frame_t * frame, xlator_t * this, loc_t * loc,
return 0;
}
-int32_t ec_gf_symlink(call_frame_t * frame, xlator_t * this,
- const char * linkname, loc_t * loc, mode_t umask,
- dict_t * xdata)
+int32_t
+ec_gf_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
ec_symlink(frame, this, -1, EC_MINIMUM_MIN, default_symlink_cbk, NULL,
linkname, loc, umask, xdata);
@@ -969,8 +1394,9 @@ int32_t ec_gf_symlink(call_frame_t * frame, xlator_t * this,
return 0;
}
-int32_t ec_gf_truncate(call_frame_t * frame, xlator_t * this, loc_t * loc,
- off_t offset, dict_t * xdata)
+int32_t
+ec_gf_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
ec_truncate(frame, this, -1, EC_MINIMUM_MIN, default_truncate_cbk, NULL,
loc, offset, xdata);
@@ -978,8 +1404,9 @@ int32_t ec_gf_truncate(call_frame_t * frame, xlator_t * this, loc_t * loc,
return 0;
}
-int32_t ec_gf_ftruncate(call_frame_t * frame, xlator_t * this, fd_t * fd,
- off_t offset, dict_t * xdata)
+int32_t
+ec_gf_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
ec_ftruncate(frame, this, -1, EC_MINIMUM_MIN, default_ftruncate_cbk, NULL,
fd, offset, xdata);
@@ -987,8 +1414,9 @@ int32_t ec_gf_ftruncate(call_frame_t * frame, xlator_t * this, fd_t * fd,
return 0;
}
-int32_t ec_gf_unlink(call_frame_t * frame, xlator_t * this, loc_t * loc,
- int xflags, dict_t * xdata)
+int32_t
+ec_gf_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
+ dict_t *xdata)
{
ec_unlink(frame, this, -1, EC_MINIMUM_MIN, default_unlink_cbk, NULL, loc,
xflags, xdata);
@@ -996,9 +1424,10 @@ int32_t ec_gf_unlink(call_frame_t * frame, xlator_t * this, loc_t * loc,
return 0;
}
-int32_t ec_gf_writev(call_frame_t * frame, xlator_t * this, fd_t * fd,
- struct iovec * vector, int32_t count, off_t offset,
- uint32_t flags, struct iobref * iobref, dict_t * xdata)
+int32_t
+ec_gf_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
ec_writev(frame, this, -1, EC_MINIMUM_MIN, default_writev_cbk, NULL, fd,
vector, count, offset, flags, iobref, xdata);
@@ -1006,9 +1435,9 @@ int32_t ec_gf_writev(call_frame_t * frame, xlator_t * this, fd_t * fd,
return 0;
}
-int32_t ec_gf_xattrop(call_frame_t * frame, xlator_t * this, loc_t * loc,
- gf_xattrop_flags_t optype, dict_t * xattr,
- dict_t * xdata)
+int32_t
+ec_gf_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
ec_xattrop(frame, this, -1, EC_MINIMUM_MIN, default_xattrop_cbk, NULL, loc,
optype, xattr, xdata);
@@ -1016,70 +1445,94 @@ int32_t ec_gf_xattrop(call_frame_t * frame, xlator_t * this, loc_t * loc,
return 0;
}
-int32_t ec_gf_fxattrop(call_frame_t * frame, xlator_t * this, fd_t * fd,
- gf_xattrop_flags_t optype, dict_t * xattr,
- dict_t * xdata)
+int32_t
+ec_gf_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- ec_fxattrop(frame, this, -1, EC_MINIMUM_MIN, default_fxattrop_cbk, NULL,
- fd, optype, xattr, xdata);
+ ec_fxattrop(frame, this, -1, EC_MINIMUM_MIN, default_fxattrop_cbk, NULL, fd,
+ optype, xattr, xdata);
return 0;
}
-int32_t ec_gf_zerofill(call_frame_t * frame, xlator_t * this, fd_t * fd,
- off_t offset, off_t len, dict_t * xdata)
+int32_t
+ec_gf_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
{
default_zerofill_failure_cbk(frame, ENOTSUP);
return 0;
}
-int32_t ec_gf_forget(xlator_t * this, inode_t * inode)
+int32_t
+ec_gf_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ ec_seek(frame, this, -1, EC_MINIMUM_ONE, default_seek_cbk, NULL, fd, offset,
+ what, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
+{
+ ec_ipc(frame, this, -1, EC_MINIMUM_MIN, default_ipc_cbk, NULL, op, xdata);
+ return 0;
+}
+
+int32_t
+ec_gf_forget(xlator_t *this, inode_t *inode)
{
uint64_t value = 0;
- ec_inode_t * ctx = NULL;
+ ec_inode_t *ctx = NULL;
- if ((inode_ctx_del(inode, this, &value) == 0) && (value != 0))
- {
+ if ((inode_ctx_del(inode, this, &value) == 0) && (value != 0)) {
ctx = (ec_inode_t *)(uintptr_t)value;
+ /* We can only forget an inode if it has been unlocked, so the stripe
+ * cache should also be empty. */
+ GF_ASSERT(list_empty(&ctx->stripe_cache.lru));
GF_FREE(ctx);
}
return 0;
}
-void ec_gf_release_fd(xlator_t * this, fd_t * fd)
+void
+ec_gf_release_fd(xlator_t *this, fd_t *fd)
{
uint64_t value = 0;
- ec_fd_t * ctx = NULL;
+ ec_fd_t *ctx = NULL;
- if ((fd_ctx_del(fd, this, &value) == 0) && (value != 0))
- {
+ if ((fd_ctx_del(fd, this, &value) == 0) && (value != 0)) {
ctx = (ec_fd_t *)(uintptr_t)value;
loc_wipe(&ctx->loc);
GF_FREE(ctx);
}
}
-int32_t ec_gf_release(xlator_t * this, fd_t * fd)
+int32_t
+ec_gf_release(xlator_t *this, fd_t *fd)
{
ec_gf_release_fd(this, fd);
return 0;
}
-int32_t ec_gf_releasedir(xlator_t * this, fd_t * fd)
+int32_t
+ec_gf_releasedir(xlator_t *this, fd_t *fd)
{
ec_gf_release_fd(this, fd);
return 0;
}
-int32_t ec_dump_private(xlator_t *this)
+int32_t
+ec_dump_private(xlator_t *this)
{
ec_t *ec = NULL;
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char tmp[65];
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char tmp[65];
GF_ASSERT(this);
@@ -1087,7 +1540,8 @@ int32_t ec_dump_private(xlator_t *this)
GF_ASSERT(ec);
snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
- gf_proc_dump_add_section(key_prefix);
+ gf_proc_dump_add_section("%s", key_prefix);
+ gf_proc_dump_write("up", "%u", ec->up);
gf_proc_dump_write("nodes", "%u", ec->nodes);
gf_proc_dump_write("redundancy", "%u", ec->redundancy);
gf_proc_dump_write("fragment_size", "%u", ec->fragment_size);
@@ -1095,88 +1549,325 @@ int32_t ec_dump_private(xlator_t *this)
gf_proc_dump_write("childs_up", "%u", ec->xl_up_count);
gf_proc_dump_write("childs_up_mask", "%s",
ec_bin(tmp, sizeof(tmp), ec->xl_up, ec->nodes));
+ if (ec->read_mask) {
+ gf_proc_dump_write("read-mask", "%s",
+ ec_bin(tmp, sizeof(tmp), ec->read_mask, ec->nodes));
+ }
+ gf_proc_dump_write("background-heals", "%d", ec->background_heals);
+ gf_proc_dump_write("heal-wait-qlength", "%d", ec->heal_wait_qlen);
+ gf_proc_dump_write("self-heal-window-size", "%" PRIu32,
+ ec->self_heal_window_size);
+ gf_proc_dump_write("healers", "%d", ec->healers);
+ gf_proc_dump_write("heal-waiters", "%d", ec->heal_waiters);
+ gf_proc_dump_write("read-policy", "%s", ec_read_policies[ec->read_policy]);
+ gf_proc_dump_write("parallel-writes", "%d", ec->parallel_writes);
+ gf_proc_dump_write("quorum-count", "%u", ec->quorum_count);
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s.stats.stripe_cache",
+ this->type, this->name);
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ gf_proc_dump_write("hits", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.stripe_cache.hits));
+ gf_proc_dump_write("misses", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.stripe_cache.misses));
+ gf_proc_dump_write("updates", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.stripe_cache.updates));
+ gf_proc_dump_write("invalidations", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.stripe_cache.invals));
+ gf_proc_dump_write("evicts", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.stripe_cache.evicts));
+ gf_proc_dump_write("allocations", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.stripe_cache.allocs));
+ gf_proc_dump_write("errors", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.stripe_cache.errors));
+ gf_proc_dump_write("heals-attempted", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.shd.attempted));
+ gf_proc_dump_write("heals-completed", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.shd.completed));
return 0;
}
-struct xlator_fops fops =
-{
- .lookup = ec_gf_lookup,
- .stat = ec_gf_stat,
- .fstat = ec_gf_fstat,
- .truncate = ec_gf_truncate,
- .ftruncate = ec_gf_ftruncate,
- .access = ec_gf_access,
- .readlink = ec_gf_readlink,
- .mknod = ec_gf_mknod,
- .mkdir = ec_gf_mkdir,
- .unlink = ec_gf_unlink,
- .rmdir = ec_gf_rmdir,
- .symlink = ec_gf_symlink,
- .rename = ec_gf_rename,
- .link = ec_gf_link,
- .create = ec_gf_create,
- .open = ec_gf_open,
- .readv = ec_gf_readv,
- .writev = ec_gf_writev,
- .flush = ec_gf_flush,
- .fsync = ec_gf_fsync,
- .opendir = ec_gf_opendir,
- .readdir = ec_gf_readdir,
- .readdirp = ec_gf_readdirp,
- .fsyncdir = ec_gf_fsyncdir,
- .statfs = ec_gf_statfs,
- .setxattr = ec_gf_setxattr,
- .getxattr = ec_gf_getxattr,
- .fsetxattr = ec_gf_fsetxattr,
- .fgetxattr = ec_gf_fgetxattr,
- .removexattr = ec_gf_removexattr,
- .fremovexattr = ec_gf_fremovexattr,
- .lk = ec_gf_lk,
- .inodelk = ec_gf_inodelk,
- .finodelk = ec_gf_finodelk,
- .entrylk = ec_gf_entrylk,
- .fentrylk = ec_gf_fentrylk,
- .xattrop = ec_gf_xattrop,
- .fxattrop = ec_gf_fxattrop,
- .setattr = ec_gf_setattr,
- .fsetattr = ec_gf_fsetattr,
- .fallocate = ec_gf_fallocate,
- .discard = ec_gf_discard,
- .zerofill = ec_gf_zerofill
-};
-
-struct xlator_cbks cbks =
-{
- .forget = ec_gf_forget,
- .release = ec_gf_release,
- .releasedir = ec_gf_releasedir
-};
-
-struct xlator_dumpops dumpops = {
- .priv = ec_dump_private
-};
-
-struct volume_options options[] =
-{
+struct xlator_fops fops = {.lookup = ec_gf_lookup,
+ .stat = ec_gf_stat,
+ .fstat = ec_gf_fstat,
+ .truncate = ec_gf_truncate,
+ .ftruncate = ec_gf_ftruncate,
+ .access = ec_gf_access,
+ .readlink = ec_gf_readlink,
+ .mknod = ec_gf_mknod,
+ .mkdir = ec_gf_mkdir,
+ .unlink = ec_gf_unlink,
+ .rmdir = ec_gf_rmdir,
+ .symlink = ec_gf_symlink,
+ .rename = ec_gf_rename,
+ .link = ec_gf_link,
+ .create = ec_gf_create,
+ .open = ec_gf_open,
+ .readv = ec_gf_readv,
+ .writev = ec_gf_writev,
+ .flush = ec_gf_flush,
+ .fsync = ec_gf_fsync,
+ .opendir = ec_gf_opendir,
+ .readdir = ec_gf_readdir,
+ .readdirp = ec_gf_readdirp,
+ .fsyncdir = ec_gf_fsyncdir,
+ .statfs = ec_gf_statfs,
+ .setxattr = ec_gf_setxattr,
+ .getxattr = ec_gf_getxattr,
+ .fsetxattr = ec_gf_fsetxattr,
+ .fgetxattr = ec_gf_fgetxattr,
+ .removexattr = ec_gf_removexattr,
+ .fremovexattr = ec_gf_fremovexattr,
+ .lk = ec_gf_lk,
+ .inodelk = ec_gf_inodelk,
+ .finodelk = ec_gf_finodelk,
+ .entrylk = ec_gf_entrylk,
+ .fentrylk = ec_gf_fentrylk,
+ .xattrop = ec_gf_xattrop,
+ .fxattrop = ec_gf_fxattrop,
+ .setattr = ec_gf_setattr,
+ .fsetattr = ec_gf_fsetattr,
+ .fallocate = ec_gf_fallocate,
+ .discard = ec_gf_discard,
+ .zerofill = ec_gf_zerofill,
+ .seek = ec_gf_seek,
+ .ipc = ec_gf_ipc};
+
+struct xlator_cbks cbks = {.forget = ec_gf_forget,
+ .release = ec_gf_release,
+ .releasedir = ec_gf_releasedir};
+
+struct xlator_dumpops dumpops = {.priv = ec_dump_private};
+
+struct volume_options options[] = {
+ {.key = {"redundancy"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "{{ volume.redundancy }}",
+ .description = "Maximum number of bricks that can fail "
+ "simultaneously without losing data."},
{
- .key = { "redundancy" },
- .type = GF_OPTION_TYPE_INT,
- .description = "Maximum number of bricks that can fail "
- "simultaneously without losing data."
- },
- {
- .key = { "self-heal-daemon" },
+ .key = {"self-heal-daemon"},
.type = GF_OPTION_TYPE_BOOL,
.description = "self-heal daemon enable/disable",
.default_value = "enable",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"disperse"},
},
- { .key = {"iam-self-heal-daemon"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- .description = "This option differentiates if the disperse "
- "translator is running as part of self-heal-daemon "
- "or not."
+ {.key = {"iam-self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option differentiates if the disperse "
+ "translator is running as part of self-heal-daemon "
+ "or not."},
+ {.key = {"eager-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {GD_OP_VERSION_3_7_10},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"disperse"},
+ .description = "Enable/Disable eager lock for regular files on a "
+ "disperse volume. If a fop takes a lock and completes "
+ "its operation, it waits for next 1 second before "
+ "releasing the lock, to see if the lock can be reused "
+ "for next fop from the same client. If ec finds any lock "
+ "contention within 1 second it releases the lock "
+ "immediately before time expires. This improves the "
+ "performance of file operations. However, as it takes "
+ "lock on first brick, for few operations like read, "
+ "discovery of lock contention might take long time and "
+ "can actually degrade the performance. If eager lock is "
+ "disabled, lock will be released as soon as fop "
+ "completes."},
+ {.key = {"other-eager-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {GD_OP_VERSION_3_13_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"disperse"},
+ .description = "It's equivalent to the eager-lock option but for non "
+ "regular files."},
+ {.key = {"eager-lock-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 60,
+ .default_value = "1",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"disperse", "locks", "timeout"},
+ .description = "Maximum time (in seconds) that a lock on an inode is "
+ "kept held if no new operations on the inode are "
+ "received."},
+ {.key = {"other-eager-lock-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 60,
+ .default_value = "1",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"disperse", "locks", "timeout"},
+ .description = "It's equivalent to eager-lock-timeout option but for "
+ "non regular files."},
+ {
+ .key = {"background-heals"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0, /*Disabling background heals*/
+ .max = 256,
+ .default_value = "8",
+ .op_version = {GD_OP_VERSION_3_7_3},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"disperse"},
+ .description = "This option can be used to control number of parallel"
+ " heals",
},
- { }
+ {
+ .key = {"heal-wait-qlength"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max =
+ 65536, /*Around 100MB as of now with sizeof(ec_fop_data_t) at 1800*/
+ .default_value = "128",
+ .op_version = {GD_OP_VERSION_3_7_3},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"disperse"},
+ .description = "This option can be used to control number of heals"
+ " that can wait",
+ },
+ {.key = {"heal-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 60,
+ .max = INT_MAX,
+ .default_value = "600",
+ .op_version = {GD_OP_VERSION_3_7_3},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"disperse"},
+ .description = "time interval for checking the need to self-heal "
+ "in self-heal-daemon"},
+ {
+ .key = {"read-policy"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"round-robin", "gfid-hash"},
+ .default_value = "gfid-hash",
+ .op_version = {GD_OP_VERSION_3_7_6},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"disperse"},
+ .description =
+ "inode-read fops happen only on 'k' number of bricks in"
+ " n=k+m disperse subvolume. 'round-robin' selects the read"
+ " subvolume using round-robin algo. 'gfid-hash' selects read"
+ " subvolume based on hash of the gfid of that file/directory.",
+ },
+ {.key = {"shd-max-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 64,
+ .default_value = "1",
+ .op_version = {GD_OP_VERSION_3_9_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"disperse"},
+ .description = "Maximum number of parallel heals SHD can do per local "
+ "brick. This can substantially lower heal times, "
+ "but can also crush your bricks if you don't have "
+ "the storage hardware to support this."},
+ {.key = {"shd-wait-qlength"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 65536,
+ .default_value = "1024",
+ .op_version = {GD_OP_VERSION_3_9_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"disperse"},
+ .description = "This option can be used to control number of heals"
+ " that can wait in SHD per subvolume"},
+ {.key = {"cpu-extensions"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"none", "auto", "x64", "sse", "avx"},
+ .default_value = "auto",
+ .op_version = {GD_OP_VERSION_3_9_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"disperse"},
+ .description = "force the cpu extensions to be used to accelerate the "
+ "galois field computations."},
+ {.key = {"self-heal-window-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 1024,
+ .default_value = "1",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"disperse"},
+ .description = "Maximum number blocks(128KB) per file for which "
+ "self-heal process would be applied simultaneously."},
+ {.key = {"optimistic-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {GD_OP_VERSION_3_10_1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT,
+ .tags = {"disperse"},
+ .description = "Set/Unset dirty flag for every update fop at the start"
+ "of the fop. If OFF, this option impacts performance of"
+ "entry operations or metadata operations as it will"
+ "set dirty flag at the start and unset it at the end of"
+ "ALL update fop. If ON and all the bricks are good,"
+ "dirty flag will be set at the start only for file fops"
+ "For metadata and entry fops dirty flag will not be set"
+ "at the start, if all the bricks are good. This does"
+ "not impact performance for metadata operations and"
+ "entry operation but has a very small window to miss"
+ "marking entry as dirty in case it is required to be"
+ "healed"},
+ {.key = {"parallel-writes"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "This controls if writes can be wound in parallel as long"
+ "as it doesn't modify same stripes"},
+ {.key = {"stripe-cache"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0, /*Disabling stripe_cache*/
+ .max = EC_STRIPE_CACHE_MAX_SIZE,
+ .default_value = "4",
+ .description = "This option will keep the last stripe of write fop"
+ "in memory. If next write falls in this stripe, we need"
+ "not to read it again from backend and we can save READ"
+ "fop going over the network. This will improve performance,"
+ "specially for sequential writes. However, this will also"
+ "lead to extra memory consumption, maximum "
+ "(cache size * stripe size) Bytes per open file."},
+ {
+ .key = {"quorum-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "0",
+ .description =
+ "This option can be used to define how many successes on"
+ "the bricks constitute a success to the application. This"
+ " count should be in the range"
+ "[disperse-data-count, disperse-count] (inclusive)",
+ },
+ {
+ .key = {"ec-read-mask"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = NULL,
+ .description = "This option can be used to choose which bricks can be"
+ " used for reading data/metadata of a file/directory",
+ },
+ {
+ .key = {NULL},
+ },
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "disperse",
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h
index 54c13077993..6f6de6d5981 100644
--- a/xlators/cluster/ec/src/ec.h
+++ b/xlators/cluster/ec/src/ec.h
@@ -11,42 +11,24 @@
#ifndef __EC_H__
#define __EC_H__
-#include "xlator.h"
-#include "timer.h"
-#include "ec-heald.h"
-#include "libxlator.h"
+#include "ec-method.h"
-#define EC_XATTR_PREFIX "trusted.ec."
-#define EC_XATTR_CONFIG EC_XATTR_PREFIX"config"
-#define EC_XATTR_SIZE EC_XATTR_PREFIX"size"
-#define EC_XATTR_VERSION EC_XATTR_PREFIX"version"
-#define EC_XATTR_HEAL EC_XATTR_PREFIX"heal"
-#define EC_XATTR_DIRTY EC_XATTR_PREFIX"dirty"
+#define EC_XATTR_PREFIX "trusted.ec."
+#define EC_XATTR_CONFIG EC_XATTR_PREFIX "config"
+#define EC_XATTR_SIZE EC_XATTR_PREFIX "size"
+#define EC_XATTR_VERSION EC_XATTR_PREFIX "version"
+#define EC_XATTR_HEAL EC_XATTR_PREFIX "heal"
+#define EC_XATTR_HEAL_NEW EC_XATTR_PREFIX "heal-new"
+#define EC_XATTR_DIRTY EC_XATTR_PREFIX "dirty"
+#define EC_STRIPE_CACHE_MAX_SIZE 10
+#define EC_VERSION_SIZE 2
+#define EC_SHD_INODE_LRU_LIMIT 10
+
+#define EC_MAX_FRAGMENTS EC_METHOD_MAX_FRAGMENTS
+/* The maximum number of nodes is derived from the maximum allowed fragments
+ * using the rule that redundancy cannot be equal or greater than the number
+ * of fragments.
+ */
+#define EC_MAX_NODES min(EC_MAX_FRAGMENTS * 2 - 1, EC_METHOD_MAX_NODES)
-struct _ec
-{
- xlator_t * xl;
- int32_t nodes;
- int32_t bits_for_nodes;
- int32_t fragments;
- int32_t redundancy;
- uint32_t fragment_size;
- uint32_t stripe_size;
- int32_t up;
- uint32_t idx;
- uint32_t xl_up_count;
- uintptr_t xl_up;
- uint32_t xl_notify_count;
- uintptr_t xl_notify;
- uintptr_t node_mask;
- xlator_t ** xl_list;
- gf_lock_t lock;
- gf_timer_t * timer;
- struct mem_pool * fop_pool;
- struct mem_pool * cbk_pool;
- struct mem_pool * lock_pool;
- ec_self_heald_t shd;
- char vol_uuid[UUID_SIZE + 1];
- dict_t *leaf_to_subvolid;
-};
#endif /* __EC_H__ */
diff --git a/xlators/cluster/ha/src/Makefile.am b/xlators/cluster/ha/src/Makefile.am
deleted file mode 100644
index 5c1364b7f9d..00000000000
--- a/xlators/cluster/ha/src/Makefile.am
+++ /dev/null
@@ -1,16 +0,0 @@
-xlator_LTLIBRARIES = ha.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/cluster
-
-ha_la_LDFLAGS = -module -avoid-version
-
-ha_la_SOURCES = ha-helpers.c ha.c
-ha_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = ha.h
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/cluster/ha/src/ha-helpers.c b/xlators/cluster/ha/src/ha-helpers.c
deleted file mode 100644
index 19be1ed2773..00000000000
--- a/xlators/cluster/ha/src/ha-helpers.c
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-#include "xlator.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "dict.h"
-#include "compat-errno.h"
-#include "ha.h"
-
-#define HA_TRANSPORT_NOTCONN(_ret, _errno, _fd) \
- ((_ret == -1) && (_fd ? (_errno == EBADFD):(_errno == ENOTCONN)))
-
-int ha_alloc_init_fd (call_frame_t *frame, fd_t *fd)
-{
- ha_local_t *local = NULL;
- int i = -1;
- ha_private_t *pvt = NULL;
- int child_count = 0;
- int ret = -1;
- hafd_t *hafdp = NULL;
- xlator_t *this = NULL;
- uint64_t tmp_hafdp = 0;
-
- this = frame->this;
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
-
- if (local == NULL) {
- ret = fd_ctx_get (fd, this, &tmp_hafdp);
- if (ret < 0) {
- goto out;
- }
- hafdp = (hafd_t *)(long)tmp_hafdp;
- local = frame->local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (local == NULL) {
- ret = -ENOMEM;
- goto out;
- }
- local->state = GF_CALLOC (1, child_count,
- gf_ha_mt_child_count);
- if (local->state == NULL) {
- ret = -ENOMEM;
- goto out;
- }
-
- /* take care of the preferred subvolume */
- if (pvt->pref_subvol == -1)
- local->active = hafdp->active;
- else
- local->active = pvt->pref_subvol;
-
- LOCK (&hafdp->lock);
- memcpy (local->state, hafdp->fdstate, child_count);
- UNLOCK (&hafdp->lock);
-
- /* in case the preferred subvolume is down */
- if ((local->active != -1) && (local->state[local->active] == 0))
- local->active = -1;
-
- for (i = 0; i < child_count; i++) {
- if (local->state[i]) {
- if (local->active == -1)
- local->active = i;
- local->tries++;
- }
- }
- if (local->active == -1) {
- ret = -ENOTCONN;
- goto out;
- }
- local->fd = fd_ref (fd);
- }
- ret = 0;
-out:
- return ret;
-}
-
-int ha_handle_cbk (call_frame_t *frame, void *cookie, int op_ret, int op_errno)
-{
- xlator_t *xl = NULL;
- ha_private_t *pvt = NULL;
- xlator_t **children = NULL;
- int prev_child = -1;
- hafd_t *hafdp = NULL;
- int ret = -1;
- call_stub_t *stub = NULL;
- ha_local_t *local = NULL;
- uint64_t tmp_hafdp = 0;
-
- xl = frame->this;
- pvt = xl->private;
- children = pvt->children;
- prev_child = (long) cookie;
- local = frame->local;
-
- if (op_ret == -1) {
- gf_log (xl->name, GF_LOG_ERROR ,"(child=%s) (op_ret=%d op_errno=%s)",
- children[prev_child]->name, op_ret, strerror (op_errno));
- }
-
- if (HA_TRANSPORT_NOTCONN (op_ret, op_errno, (local->fd))) {
- ret = 0;
- if (local->fd) {
- ret = fd_ctx_get (local->fd, xl, &tmp_hafdp);
- }
- hafdp = (hafd_t *)(long)tmp_hafdp;
- if (ret == 0) {
- if (local->fd) {
- LOCK(&hafdp->lock);
- hafdp->fdstate[prev_child] = 0;
- UNLOCK(&hafdp->lock);
- }
- local->tries--;
- if (local->tries != 0) {
- while (1) {
- local->active = (local->active + 1) % pvt->child_count;
- if (local->state[local->active])
- break;
- }
- stub = local->stub;
- local->stub = NULL;
- call_resume (stub);
- return -1;
- }
- }
- }
- if (local->stub) {
- call_stub_destroy (local->stub);
- local->stub = NULL;
- }
-
- if (local->fd) {
- GF_FREE (local->state);
- local->state = NULL;
-
- fd_unref (local->fd);
- local->fd = NULL;
- }
- return 0;
-}
-
-int ha_alloc_init_inode (call_frame_t *frame, inode_t *inode)
-{
- int i = -1;
- ha_private_t *pvt = NULL;
- xlator_t *xl = NULL;
- int ret = -1;
- ha_local_t *local = NULL;
- uint64_t tmp_state = 0;
-
- xl = frame->this;
- pvt = xl->private;
- local = frame->local;
-
- if (local == NULL) {
- local = frame->local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (local == NULL) {
- ret = -ENOMEM;
- goto out;
- }
- local->active = pvt->pref_subvol;
- ret = inode_ctx_get (inode, xl, &tmp_state);
- if (ret < 0) {
- goto out;
- }
- local->state = (char *)(long)tmp_state;
- if (local->active != -1 && local->state[local->active] == 0)
- local->active = -1;
- for (i = 0; i < pvt->child_count; i++) {
- if (local->state[i]) {
- if (local->active == -1)
- local->active = i;
- local->tries++;
- }
- }
- if (local->active == -1) {
- ret = -ENOTCONN;
- goto out;
- }
- }
- ret = 0;
-out:
- return ret;
-}
diff --git a/xlators/cluster/ha/src/ha-mem-types.h b/xlators/cluster/ha/src/ha-mem-types.h
deleted file mode 100644
index e5e97d237dc..00000000000
--- a/xlators/cluster/ha/src/ha-mem-types.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __HA_MEM_TYPES_H__
-#define __HA_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_ha_mem_types_ {
- gf_ha_mt_ha_local_t = gf_common_mt_end + 1,
- gf_ha_mt_hafd_t,
- gf_ha_mt_char,
- gf_ha_mt_child_count,
- gf_ha_mt_xlator_t,
- gf_ha_mt_ha_private_t,
- gf_ha_mt_end
-};
-#endif
-
diff --git a/xlators/cluster/ha/src/ha.c b/xlators/cluster/ha/src/ha.c
deleted file mode 100644
index 742ec46f107..00000000000
--- a/xlators/cluster/ha/src/ha.c
+++ /dev/null
@@ -1,4013 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-/* generate errors randomly, code is simple now, better alogorithm
- * can be written to decide what error to be returned and when
- */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "dict.h"
-#include "compat-errno.h"
-#include "ha.h"
-
-/*
- * TODO:
- * - dbench fails if ha over server side afr
- * - lock calls - lock on all subvols.
- * - support preferred-subvolume option. code already there.
- * - do not alloc the call-stub in case only one subvol is up.
- */
-
-void
-ha_local_wipe (ha_local_t *local)
-{
- if (local->stub) {
- call_stub_destroy (local->stub);
- local->stub = NULL;
- }
-
- if (local->state) {
- GF_FREE (local->state);
- local->state = NULL;
- }
-
- if (local->dict) {
- dict_unref (local->dict);
- local->dict = NULL;
- }
-
- loc_wipe (&local->loc);
-
- if (local->fd) {
- fd_unref (local->fd);
- local->fd = NULL;
- }
-
- if (local->inode) {
- inode_unref (local->inode);
- local->inode = NULL;
- }
-
- GF_FREE (local);
- return;
-}
-
-
-int
-ha_forget (xlator_t *this,
- inode_t *inode)
-{
- uint64_t stateino = 0;
- char *state = NULL;
- if (!inode_ctx_del (inode, this, &stateino)) {
- state = ((char *)(long)stateino);
- GF_FREE (state);
- }
-
- return 0;
-
-}
-
-int32_t
-ha_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- int child_count = 0, i = 0, callcnt = 0;
- char *state = NULL;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- uint64_t tmp_state = 0;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- prev_frame = cookie;
- children = pvt->children;
-
- for (i = 0; i < child_count; i++) {
- if (pvt->children[i] == prev_frame->this)
- break;
- }
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_ERROR, "(child=%s) (op_ret=%d op_errno=%s)",
- children[i]->name, op_ret, strerror (op_errno));
- }
- inode_ctx_get (local->inode, this, &tmp_state);
- state = (char *)(long)tmp_state;
-
- LOCK (&frame->lock);
- if (local->revalidate == 1) {
- if ((!op_ret) != state[i]) {
- local->revalidate_error = 1;
- gf_log (this->name, GF_LOG_DEBUG, "revalidate error on %s",
- pvt->children[i]->name);
- }
- } else {
- if (op_ret == 0) {
- state[i] = 1;
- }
- }
- if (local->op_ret == -1 && op_ret == 0) {
- local->op_ret = 0;
- local->buf = *buf;
- local->postparent = *postparent;
- if (dict)
- local->dict = dict_ref (dict);
- }
- if (op_ret == -1 && op_ret != ENOTCONN)
- local->op_errno = op_errno;
- callcnt = --local->call_count;
- UNLOCK (&frame->lock);
-
- if (callcnt == 0) {
- dict_t *ctx = local->dict;
- inode_t *inode = local->inode;
- if (local->revalidate_error == 1) {
- local->op_ret = -1;
- local->op_errno = EIO;
- gf_log (this->name, GF_LOG_DEBUG, "revalidate error, returning EIO");
- }
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- inode,
- &local->buf,
- ctx,
- &local->postparent);
- if (inode)
- inode_unref (inode);
- if (ctx)
- dict_unref (ctx);
- }
- return 0;
-}
-
-int32_t
-ha_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- int child_count = 0, i = 0;
- char *state = NULL;
- xlator_t **children = NULL;
- int ret = -1;
- int32_t op_errno = EINVAL;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- children = pvt->children;
-
- frame->local = local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto unwind;
- }
-
- child_count = pvt->child_count;
- local->inode = inode_ref (loc->inode);
-
- ret = inode_ctx_get (loc->inode, this, NULL);
- if (ret) {
- state = GF_CALLOC (1, child_count, gf_ha_mt_child_count);
- if (state == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto unwind;
- }
-
- inode_ctx_put (loc->inode, this, (uint64_t)(long)state);
- } else
- local->revalidate = 1;
-
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->call_count = child_count;
-
- for (i = 0; i < child_count; i++) {
- STACK_WIND (frame,
- ha_lookup_cbk,
- children[i],
- children[i]->fops->lookup,
- loc,
- xattr_req);
- }
- return 0;
-
-unwind:
- local = frame->local;
- frame->local = NULL;
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL);
-
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_stat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- }
- return 0;
-}
-
-int32_t
-ha_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- ha_local_t *local = NULL;
- int op_errno = ENOTCONN;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_stat_stub (frame, ha_stat, loc);
-
- STACK_WIND_COOKIE (frame,
- ha_stat_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->stat,
- loc);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
-}
-
-int32_t
-ha_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame, op_ret, op_errno, statpre, statpost);
- }
- return 0;
-}
-
-
-int32_t
-ha_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
- int32_t valid)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_setattr_stub (frame, ha_setattr, loc, stbuf, valid);
-
- STACK_WIND_COOKIE (frame,
- ha_setattr_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->setattr,
- loc, stbuf, valid);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-ha_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
- int32_t valid)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_fsetattr_stub (frame, ha_fsetattr, fd, stbuf, valid);
-
- STACK_WIND_COOKIE (frame,
- ha_setattr_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->fsetattr,
- fd, stbuf, valid);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-ha_truncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- prebuf,
- postbuf);
- }
- return 0;
-}
-
-int32_t
-ha_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_truncate_stub (frame, ha_truncate, loc, offset);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_truncate_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->truncate,
- loc,
- offset);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
- int32_t
-ha_ftruncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- prebuf, postbuf);
- }
- return 0;
-}
-
-int32_t
-ha_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_ftruncate_stub (frame, ha_ftruncate, fd, offset);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_ftruncate_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->ftruncate,
- fd,
- offset);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- ha_local_wipe (local);
- return 0;
-}
-
-int32_t
-ha_access_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- }
- return 0;
-}
-
-int32_t
-ha_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_access_stub (frame, ha_access, loc, mask);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_access_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->access,
- loc,
- mask);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno);
-
- ha_local_wipe (local);
- return 0;
-}
-
-
- int32_t
-ha_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path,
- struct iatt *sbuf)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- path,
- sbuf);
- }
- return 0;
-}
-
-int32_t
-ha_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size)
-{
- ha_local_t *local = frame->local;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_readlink_stub (frame, ha_readlink, loc, size);
-
- STACK_WIND_COOKIE (frame,
- ha_readlink_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->readlink,
- loc,
- size);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int
-ha_mknod_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- char *stateino = NULL;
- int child_count = 0, i = 0, cnt = 0, ret = 0;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- uint64_t tmp_stateino = 0;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- prev_frame = cookie;
- children = pvt->children;
-
- for (i = 0; i < child_count; i++)
- if (prev_frame->this == children[i])
- break;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "(path=%s) (op_ret=%d op_errno=%d)",
- local->stub->args.mknod.loc.path, op_ret, op_errno);
- }
- ret = inode_ctx_get (local->stub->args.mknod.loc.inode,
- this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "unwind(-1), inode_ctx_get() error");
- /* It is difficult to handle this error at this stage
- * as we still expect more cbks, we can't return as
- * of now
- */
- } else if (op_ret == 0) {
- stateino[i] = 1;
- }
- LOCK (&frame->lock);
- cnt = --local->call_count;
- UNLOCK (&frame->lock);
-
- if (cnt == 0) {
- call_stub_t *stub = local->stub;
- GF_FREE (local->state);
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- local->stub->args.mknod.loc.inode,
- &local->buf, &local->preparent,
- &local->postparent);
- call_stub_destroy (stub);
- }
- return 0;
-}
-
-int32_t
-ha_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- char *stateino = NULL;
- int child_count = 0, i = 0, cnt = 0, ret = 0;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- uint64_t tmp_stateino = 0;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- prev_frame = cookie;
- children = pvt->children;
-
- for (i = 0; i < child_count; i++)
- if (prev_frame->this == children[i])
- break;
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.mknod.loc.path, op_ret, op_errno);
- }
-
- ret = inode_ctx_get (local->stub->args.mknod.loc.inode,
- this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "inode_ctx_get() error");
- /* FIXME: handle the case */
- }
- if (op_ret == 0) {
- stateino[i] = 1;
- local->op_ret = 0;
- local->first_success = 1;
- local->buf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
- }
- cnt = --local->call_count;
- for (i = local->active + 1; i < child_count; i++) {
- if (local->state[i])
- break;
- }
-
- if (cnt == 0 || i == child_count) {
- call_stub_t *stub = local->stub;
- GF_FREE (local->state);
- stub = local->stub;
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->stub->args.mknod.loc.inode, &local->buf,
- &local->preparent, &local->postparent);
- call_stub_destroy (stub);
- return 0;
- }
-
- local->active = i;
-
- if (local->first_success == 0) {
- STACK_WIND (frame,
- ha_mknod_cbk,
- children[i],
- children[i]->fops->mknod,
- &local->stub->args.mknod.loc,
- local->stub->args.mknod.mode,
- local->stub->args.mknod.rdev);
- return 0;
- }
- cnt = local->call_count;
-
- for (; i < child_count; i++) {
- if (local->state[i]) {
- STACK_WIND (frame,
- ha_mknod_lookup_cbk,
- children[i],
- children[i]->fops->lookup,
- &local->stub->args.mknod.loc,
- 0);
- if (--cnt == 0)
- break;
- }
- }
- return 0;
-}
-
-int32_t
-ha_mknod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t rdev)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- int child_count = 0, i = 0;
- char *stateino = NULL;
- int32_t op_errno = EINVAL;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
-
- frame->local = local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->stub = fop_mknod_stub (frame, ha_mknod, loc, mode, rdev);
- if (!local->stub) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!local->state) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- memcpy (local->state, pvt->state, child_count);
- local->active = -1;
-
- stateino = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!stateino) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino);
-
- for (i = 0; i < child_count; i++) {
- if (local->state[i]) {
- local->call_count++;
- if (local->active == -1)
- local->active = i;
- }
- }
-
- STACK_WIND (frame,
- ha_mknod_cbk,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->mknod,
- loc, mode, rdev);
- return 0;
-
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL);
- ha_local_wipe (local);
- return 0;
-}
-
-
-int
-ha_mkdir_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- char *stateino = NULL;
- int child_count = 0, i = 0, cnt = 0;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- uint64_t tmp_stateino = 0;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- prev_frame = cookie;
- children = pvt->children;
-
- for (i = 0; i < child_count; i++)
- if (prev_frame->this == children[i])
- break;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.mkdir.loc.path, op_ret, op_errno);
- }
- inode_ctx_get (local->stub->args.mkdir.loc.inode,
- this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
-
- if (op_ret == 0)
- stateino[i] = 1;
-
- LOCK (&frame->lock);
- cnt = --local->call_count;
- UNLOCK (&frame->lock);
-
- if (cnt == 0) {
- call_stub_t *stub = local->stub;
- GF_FREE (local->state);
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- local->stub->args.mkdir.loc.inode, &local->buf,
- &local->preparent, &local->postparent);
- call_stub_destroy (stub);
- }
- return 0;
-}
-
-int32_t
-ha_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- char *stateino = NULL;
- int child_count = 0, i = 0, cnt = 0;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- uint64_t tmp_stateino = 0;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- prev_frame = cookie;
- children = pvt->children;
-
- for (i = 0; i < child_count; i++)
- if (prev_frame->this == children[i])
- break;
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.mkdir.loc.path, op_ret, op_errno);
- }
-
- inode_ctx_get (local->stub->args.mkdir.loc.inode,
- this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
-
- if (op_ret == 0) {
- stateino[i] = 1;
- local->op_ret = 0;
- local->first_success = 1;
- local->buf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
- }
- cnt = --local->call_count;
- for (i = local->active + 1; i < child_count; i++) {
- if (local->state[i])
- break;
- }
-
- if (cnt == 0 || i == child_count) {
- call_stub_t *stub = local->stub;
- GF_FREE (local->state);
- stub = local->stub;
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->stub->args.mkdir.loc.inode, &local->buf,
- &local->preparent, &local->postparent);
- call_stub_destroy (stub);
- return 0;
- }
-
- local->active = i;
-
- if (local->first_success == 0) {
- STACK_WIND (frame,
- ha_mkdir_cbk,
- children[i],
- children[i]->fops->mkdir,
- &local->stub->args.mkdir.loc,
- local->stub->args.mkdir.mode);
- return 0;
- }
- cnt = local->call_count;
-
- for (; i < child_count; i++) {
- if (local->state[i]) {
- STACK_WIND (frame,
- ha_mkdir_lookup_cbk,
- children[i],
- children[i]->fops->lookup,
- &local->stub->args.mkdir.loc,
- 0);
- if (--cnt == 0)
- break;
- }
- }
- return 0;
-}
-
-int32_t
-ha_mkdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- int child_count = 0, i = 0;
- char *stateino = NULL;
- int32_t op_errno = EINVAL;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
-
- frame->local = local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!frame->local) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->stub = fop_mkdir_stub (frame, ha_mkdir, loc, mode);
- if (!local->stub) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!local->state) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- memcpy (local->state, pvt->state, child_count);
- local->active = -1;
-
- stateino = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!stateino) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino);
- for (i = 0; i < child_count; i++) {
- if (local->state[i]) {
- local->call_count++;
- if (local->active == -1)
- local->active = i;
- }
- }
-
- STACK_WIND (frame,
- ha_mkdir_cbk,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->mkdir,
- loc, mode);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL);
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0) {
- STACK_UNWIND (frame, op_ret, op_errno, preparent, postparent);
- }
- return 0;
-}
-
-int32_t
-ha_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
-
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_unlink_stub (frame, ha_unlink, loc);
- if (!local->stub) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_unlink_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->unlink,
- loc);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
- int32_t
-ha_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- preparent,
- postparent);
- }
- return 0;
-}
-
-int32_t
-ha_rmdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- ha_local_t *local = frame->local;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_rmdir_stub (frame, ha_rmdir, loc);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_rmdir_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->rmdir,
- loc);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int
-ha_symlink_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- char *stateino = NULL;
- int child_count = 0, i = 0, cnt = 0;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- uint64_t tmp_stateino = 0;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- prev_frame = cookie;
- children = pvt->children;
-
- for (i = 0; i < child_count; i++)
- if (prev_frame->this == children[i])
- break;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.symlink.loc.path, op_ret, op_errno);
- }
- inode_ctx_get (local->stub->args.symlink.loc.inode,
- this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
-
- if (op_ret == 0)
- stateino[i] = 1;
-
- LOCK (&frame->lock);
- cnt = --local->call_count;
- UNLOCK (&frame->lock);
-
- if (cnt == 0) {
- call_stub_t *stub = local->stub;
- GF_FREE (local->state);
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- local->stub->args.symlink.loc.inode, &local->buf,
- &local->preparent, &local->postparent);
- call_stub_destroy (stub);
- }
- return 0;
-}
-
-int32_t
-ha_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- char *stateino = NULL;
- int child_count = 0, i = 0, cnt = 0;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- uint64_t tmp_stateino = 0;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- prev_frame = cookie;
- children = pvt->children;
-
- for (i = 0; i < child_count; i++)
- if (prev_frame->this == children[i])
- break;
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.symlink.loc.path, op_ret, op_errno);
- }
- inode_ctx_get (local->stub->args.symlink.loc.inode,
- this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
-
- if (op_ret == 0) {
- stateino[i] = 1;
- local->op_ret = 0;
- local->first_success = 1;
- local->buf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
- }
- cnt = --local->call_count;
- for (i = local->active + 1; i < child_count; i++) {
- if (local->state[i])
- break;
- }
-
- if (cnt == 0 || i == child_count) {
- call_stub_t *stub = local->stub;
- GF_FREE (local->state);
- stub = local->stub;
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->stub->args.symlink.loc.inode, &local->buf,
- &local->preparent, &local->postparent);
- call_stub_destroy (stub);
- return 0;
- }
-
- local->active = i;
-
- if (local->first_success == 0) {
- STACK_WIND (frame,
- ha_symlink_cbk,
- children[i],
- children[i]->fops->symlink,
- local->stub->args.symlink.linkname,
- &local->stub->args.symlink.loc);
- return 0;
- }
- cnt = local->call_count;
-
- for (; i < child_count; i++) {
- if (local->state[i]) {
- STACK_WIND (frame,
- ha_symlink_lookup_cbk,
- children[i],
- children[i]->fops->lookup,
- &local->stub->args.symlink.loc,
- 0);
- if (--cnt == 0)
- break;
- }
- }
- return 0;
-}
-
-int32_t
-ha_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *linkname,
- loc_t *loc)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- int child_count = 0, i = 0;
- char *stateino = NULL;
- int32_t op_errno = EINVAL;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
-
- frame->local = local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- local->stub = fop_symlink_stub (frame, ha_symlink, linkname, loc);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!local->state) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- memcpy (local->state, pvt->state, child_count);
- local->active = -1;
-
- stateino = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!stateino) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino);
-
- for (i = 0; i < child_count; i++) {
- if (local->state[i]) {
- local->call_count++;
- if (local->active == -1) {
- local->active = i;
- }
- }
- }
-
- STACK_WIND (frame,
- ha_symlink_cbk,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->symlink,
- linkname, loc);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL);
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame, op_ret, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent);
- }
- return 0;
-}
-
-int32_t
-ha_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, oldloc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_rename_stub (frame, ha_rename, oldloc, newloc);
- if (!local->stub) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_rename_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->rename,
- oldloc, newloc);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-int
-ha_link_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- char *stateino = NULL;
- int child_count = 0, i = 0, cnt = 0;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- uint64_t tmp_stateino = 0;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- prev_frame = cookie;
- children = pvt->children;
-
- for (i = 0; i < child_count; i++)
- if (prev_frame->this == children[i])
- break;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.link.newloc.path, op_ret, op_errno);
- }
- inode_ctx_get (local->stub->args.link.newloc.inode,
- this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
-
- if (op_ret == 0)
- stateino[i] = 1;
-
- LOCK (&frame->lock);
- cnt = --local->call_count;
- UNLOCK (&frame->lock);
-
- if (cnt == 0) {
- call_stub_t *stub = local->stub;
- GF_FREE (local->state);
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- local->stub->args.link.oldloc.inode, &local->buf,
- &local->preparent, &local->postparent);
- call_stub_destroy (stub);
- }
- return 0;
-}
-
-int32_t
-ha_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- char *stateino = NULL;
- int child_count = 0, i = 0, cnt = 0;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- uint64_t tmp_stateino = 0;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- prev_frame = cookie;
- children = pvt->children;
-
- for (i = 0; i < child_count; i++)
- if (prev_frame->this == children[i])
- break;
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.link.newloc.path, op_ret, op_errno);
- }
- inode_ctx_get (local->stub->args.link.newloc.inode,
- this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
-
- if (op_ret == 0) {
- stateino[i] = 1;
- local->op_ret = 0;
- local->first_success = 1;
- local->buf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
- }
- cnt = --local->call_count;
- for (i = local->active + 1; i < child_count; i++) {
- if (local->state[i])
- break;
- }
-
- if (cnt == 0 || i == child_count) {
- call_stub_t *stub = local->stub;
- GF_FREE (local->state);
- stub = local->stub;
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->stub->args.link.oldloc.inode, &local->buf,
- &local->preparent, &local->postparent);
- call_stub_destroy (stub);
- return 0;
- }
-
- local->active = i;
-
- if (local->first_success == 0) {
- STACK_WIND (frame,
- ha_link_cbk,
- children[i],
- children[i]->fops->link,
- &local->stub->args.link.oldloc,
- &local->stub->args.link.newloc);
- return 0;
- }
- cnt = local->call_count;
-
- for (; i < child_count; i++) {
- if (local->state[i]) {
- STACK_WIND (frame,
- ha_link_lookup_cbk,
- children[i],
- children[i]->fops->lookup,
- &local->stub->args.link.newloc,
- 0);
- if (--cnt == 0)
- break;
- }
- }
- return 0;
-}
-
-int32_t
-ha_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- int child_count = 0, i = 0;
- char *stateino = NULL;
- int32_t ret = 0, op_errno = 0;
- uint64_t tmp_stateino = 0;
-
- ret = inode_ctx_get (newloc->inode, this, &tmp_stateino);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "dict_ptr_error()");
- }
- stateino = (char *)(long)tmp_stateino;
-
- if (stateino == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "newloc->inode's ctx is NULL, returning EINVAL");
- STACK_UNWIND (frame, -1, EINVAL, oldloc->inode, NULL, NULL,
- NULL);
- return 0;
- }
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
-
- frame->local = local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!frame->local) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->stub = fop_link_stub (frame, ha_link, oldloc, newloc);
- if (!local->stub) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!local->state) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- memcpy (local->state, pvt->state, child_count);
- local->active = -1;
-
- for (i = 0; i < child_count; i++) {
- if (local->state[i]) {
- local->call_count++;
- if (local->active == -1)
- local->active = i;
- }
- }
-
- STACK_WIND (frame,
- ha_link_cbk,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->link,
- oldloc,
- newloc);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-ha_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- int i, child_count = 0, cnt = 0, ret = 0;
- char *stateino = NULL;
- hafd_t *hafdp = NULL;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- uint64_t tmp_stateino = 0;
- uint64_t tmp_hafdp = 0;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- prev_frame = cookie;
- children = pvt->children;
-
- ret = inode_ctx_get (local->stub->args.create.loc.inode,
- this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "dict_to_ptr() error");
- /* FIXME: handle */
- }
- ret = fd_ctx_get (local->stub->args.create.fd, this, &tmp_hafdp);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "dict_to_ptr() error");
- /* FIXME: handle */
- }
- hafdp = (hafd_t *)(long)tmp_hafdp;
-
- for (i = 0; i < child_count; i++) {
- if (prev_frame->this == children[i])
- break;
- }
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.create.loc.path, op_ret, op_errno);
- }
- if (op_ret != -1) {
- stateino[i] = 1;
- hafdp->fdstate[i] = 1;
- if (local->op_ret == -1) {
- local->op_ret = 0;
- local->buf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
- local->first_success = 1;
- }
- local->stub->args.create.flags &= (~O_EXCL);
- }
- LOCK (&frame->lock);
- cnt = --local->call_count;
- UNLOCK (&frame->lock);
-
- for (i = local->active + 1; i < child_count; i++) {
- if (local->state[i])
- break;
- }
-
- if (cnt == 0 || i == child_count) {
- char *state = local->state;
- call_stub_t *stub = local->stub;
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- stub->args.create.fd,
- stub->args.create.loc.inode, &local->buf,
- &local->preparent, &local->postparent);
- GF_FREE (state);
- call_stub_destroy (stub);
- return 0;
- }
- local->active = i;
- cnt = local->call_count;
- for (; i < child_count; i++) {
- if (local->state[i]) {
- STACK_WIND (frame,
- ha_create_cbk,
- children[i],
- children[i]->fops->create,
- &local->stub->args.create.loc,
- local->stub->args.create.flags,
- local->stub->args.create.mode,
- local->stub->args.create.fd);
- if ((local->first_success == 0) || (cnt == 0))
- break;
- }
- }
- return 0;
-}
-
-int32_t
-ha_create (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode, fd_t *fd)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- int i, child_count = 0;
- char *stateino = NULL;
- xlator_t **children = NULL;
- hafd_t *hafdp = NULL;
- int32_t op_errno = EINVAL;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- children = pvt->children;
-
- if (local == NULL) {
- frame->local = local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- local->stub = fop_create_stub (frame, ha_create, loc, flags, mode, fd);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!local->state) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- local->active = -1;
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- memcpy (local->state, pvt->state, child_count);
-
- for (i = 0; i < pvt->child_count; i++) {
- if (local->state[i]) {
- local->call_count++;
- if (local->active == -1)
- local->active = i;
- }
- }
- /* FIXME handle active -1 */
- stateino = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!stateino) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- hafdp = GF_CALLOC (1, sizeof (*hafdp), gf_ha_mt_hafd_t);
- if (!hafdp) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- hafdp->fdstate = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!hafdp->fdstate) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- hafdp->path = gf_strdup(loc->path);
- if (!hafdp->path) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- LOCK_INIT (&hafdp->lock);
- fd_ctx_set (fd, this, (uint64_t)(long)hafdp);
- inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino);
- }
-
- STACK_WIND (frame,
- ha_create_cbk,
- children[local->active],
- children[local->active]->fops->create,
- loc, flags, mode, fd);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- ha_local_wipe (local);
-
- if (stateino) {
- GF_FREE (stateino);
- stateino = NULL;
- }
-
- if (hafdp) {
- GF_FREE (hafdp->fdstate);
-
- GF_FREE (hafdp->path);
-
- GF_FREE (hafdp);
- }
-
- return 0;
-}
-
- int32_t
-ha_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- xlator_t **children = NULL;
- int i = 0, child_count = 0, callcnt = 0, ret = 0;
- call_frame_t *prev_frame = NULL;
- hafd_t *hafdp = NULL;
- uint64_t tmp_hafdp = 0;
-
- local = frame->local;
- pvt = this->private;
- children = pvt->children;
- child_count = pvt->child_count;
- prev_frame = cookie;
-
- ret = fd_ctx_get (local->fd, this, &tmp_hafdp);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "dict_ptr_error()");
- }
- hafdp = (hafd_t *)(long)tmp_hafdp;
-
- for (i = 0; i < child_count; i++)
- if (children[i] == prev_frame->this)
- break;
- LOCK (&frame->lock);
- if (op_ret != -1) {
- hafdp->fdstate[i] = 1;
- local->op_ret = 0;
- }
- if (op_ret == -1 && op_errno != ENOTCONN)
- local->op_errno = op_errno;
- callcnt = --local->call_count;
- UNLOCK (&frame->lock);
-
- if (callcnt == 0) {
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- local->fd);
- }
- return 0;
-}
-
-int32_t
-ha_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags, fd_t *fd, int wbflags)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- char *stateino = NULL;
- xlator_t **children = NULL;
- int cnt = 0, i, child_count = 0, ret = 0;
- hafd_t *hafdp = NULL;
- uint64_t tmp_stateino = 0;
- int32_t op_errno = ENOMEM;
-
- local = frame->local;
- pvt = this->private;
- children = pvt->children;
- child_count = pvt->child_count;
-
-
- frame->local = local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->fd = fd;
-
- hafdp = GF_CALLOC (1, sizeof (*hafdp), gf_ha_mt_hafd_t);
- if (!hafdp) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- hafdp->fdstate = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!hafdp->fdstate) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- hafdp->path = gf_strdup (loc->path);
- if (!hafdp->path) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- hafdp->active = -1;
- if (pvt->pref_subvol == -1) {
- hafdp->active = fd->inode->ino % child_count;
- }
-
- LOCK_INIT (&hafdp->lock);
- fd_ctx_set (fd, this, (uint64_t)(long)hafdp);
- ret = inode_ctx_get (loc->inode, this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
-
- for (i = 0; i < child_count; i++)
- if (stateino[i])
- cnt++;
- local->call_count = cnt;
- for (i = 0; i < child_count; i++) {
- if (stateino[i]) {
- STACK_WIND (frame,
- ha_open_cbk,
- children[i],
- children[i]->fops->open,
- loc, flags, fd, wbflags);
- if (--cnt == 0)
- break;
- }
- }
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, fd);
- if (hafdp) {
- if (hafdp->fdstate) {
- GF_FREE (hafdp->fdstate);
- hafdp->fdstate = NULL;
- }
-
- if (hafdp->path) {
- GF_FREE (hafdp->path);
- hafdp->path = NULL;
- }
-
- GF_FREE (hafdp);
- }
-
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref)
-{
- int ret = 0;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- vector,
- count,
- stbuf,
- iobref);
- }
- return 0;
-}
-
-int32_t
-ha_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_readv_stub (frame, ha_readv, fd, size, offset);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_readv_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->readv,
- fd,
- size,
- offset);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL, NULL);
-
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- int ret = 0;
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- prebuf,
- postbuf);
- }
- return 0;
-}
-
-int32_t
-ha_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off,
- struct iobref *iobref)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_writev_stub (frame, ha_writev, fd, vector, count, off,
- iobref);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_writev_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->writev,
- fd,
- vector,
- count,
- off,
- iobref);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int ret = 0;
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- }
- return 0;
-}
-
-int32_t
-ha_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_flush_stub (frame, ha_flush, fd);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_flush_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->flush,
- fd);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno);
-
- ha_local_wipe (local);
- return 0;
-}
-
-
- int32_t
-ha_fsync_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- int ret = 0;
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- }
- return 0;
-}
-
-int32_t
-ha_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_fsync_stub (frame, ha_fsync, fd, flags);
- if (!local->stub) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_fsync_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->fsync,
- fd,
- flags);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno);
-
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_fstat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf)
-{
- int ret = 0;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- }
- return 0;
-}
-
-int32_t
-ha_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
-
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_fstat_stub (frame, ha_fstat, fd);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_fstat_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->fstat,
- fd);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL);
-
- ha_local_wipe (local);
- return 0;
-}
-
-int32_t
-ha_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- xlator_t **children = NULL;
- int i = 0, child_count = 0, callcnt = 0, ret = 0;
- call_frame_t *prev_frame = NULL;
- hafd_t *hafdp = NULL;
- uint64_t tmp_hafdp = 0;
-
- local = frame->local;
- pvt = this->private;
- children = pvt->children;
- child_count = pvt->child_count;
- prev_frame = cookie;
-
- ret = fd_ctx_get (local->fd, this, &tmp_hafdp);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "dict_ptr_error()");
- }
- hafdp = (hafd_t *)(long)tmp_hafdp;
-
- for (i = 0; i < child_count; i++)
- if (children[i] == prev_frame->this)
- break;
- LOCK (&frame->lock);
- if (op_ret != -1) {
- hafdp->fdstate[i] = 1;
- local->op_ret = 0;
- }
- if (op_ret == -1 && op_errno != ENOTCONN)
- local->op_errno = op_errno;
- callcnt = --local->call_count;
- UNLOCK (&frame->lock);
-
- if (callcnt == 0) {
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- local->fd);
- }
- return 0;
-}
-
-int32_t
-ha_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, fd_t *fd)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- char *stateino = NULL;
- xlator_t **children = NULL;
- int cnt = 0, i, child_count = 0, ret = 0;
- hafd_t *hafdp = NULL;
- uint64_t tmp_stateino = 0;
- int32_t op_errno = EINVAL;
-
- local = frame->local;
- pvt = this->private;
- children = pvt->children;
- child_count = pvt->child_count;
-
- frame->local = local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->fd = fd;
-
- hafdp = GF_CALLOC (1, sizeof (*hafdp), gf_ha_mt_hafd_t);
- if (!hafdp) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- hafdp->fdstate = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!hafdp->fdstate) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- hafdp->path = gf_strdup (loc->path);
- if (!hafdp->path) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- LOCK_INIT (&hafdp->lock);
- fd_ctx_set (fd, this, (uint64_t)(long)hafdp);
- ret = inode_ctx_get (loc->inode, this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "inode_ctx_get() error");
- }
- for (i = 0; i < child_count; i++)
- if (stateino[i])
- cnt++;
- local->call_count = cnt;
- for (i = 0; i < child_count; i++) {
- if (stateino[i]) {
- STACK_WIND (frame,
- ha_opendir_cbk,
- children[i],
- children[i]->fops->opendir,
- loc, fd);
- if (--cnt == 0)
- break;
- }
- }
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL);
- ha_local_wipe (local);
- if (hafdp) {
- if (hafdp->fdstate) {
- GF_FREE (hafdp->fdstate);
- hafdp->fdstate = NULL;
- }
-
- if (hafdp->path) {
- GF_FREE (hafdp->path);
- hafdp->path = NULL;
- }
-
- GF_FREE (hafdp);
- }
- return 0;
-}
-
- int32_t
-ha_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entries,
- int32_t count)
-{
- int ret = 0;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- entries,
- count);
- }
- return 0;
-}
-
-int32_t
-ha_getdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- int32_t flag)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_getdents_stub (frame, ha_getdents, fd, size, offset,
- flag);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_getdents_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->getdents,
- fd,
- size,
- offset,
- flag);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL, 0);
-
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int ret = 0;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- }
- return 0;
-}
-
-int32_t
-ha_setdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags,
- dir_entry_t *entries,
- int32_t count)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
-
- local->stub = fop_setdents_stub (frame, ha_setdents, fd, flags, entries,
- count);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_setdents_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->setdents,
- fd,
- flags,
- entries,
- count);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno);
-
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_fsyncdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int ret = 0;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- }
- return 0;
-}
-
-int32_t
-ha_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_fsyncdir_stub (frame, ha_fsyncdir, fd, flags);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_fsyncdir_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->fsyncdir,
- fd,
- flags);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- ha_local_wipe (local);
- return 0;
-}
-
-
- int32_t
-ha_statfs_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf)
-{
- ha_local_t *local = NULL;
- ha_private_t *priv = NULL;
-
- GF_ASSERT (this);
-
- local = frame->local;
- priv = this->private;
- GF_ASSERT (priv);
-
- if (-1 == op_ret) {
- local->active = (local->active + 1) % priv->child_count;
- local->tries--;
- if (!local->tries)
- goto out;
-
- STACK_WIND (frame, ha_statfs_cbk,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->statfs,
- &local->loc);
- return 0;
- }
-
- out:
- loc_wipe (&local->loc);
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-ha_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- ha_private_t *priv = NULL;
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- /* The normal way of handling failover doesn't work here
- * as loc->inode may be null in this case.
- */
- local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- priv = this->private;
- frame->local = local;
- local->active = priv->pref_subvol;
- if (-1 == local->active)
- local->active = 0;
- local->tries = priv->child_count;
- loc_copy (&local->loc, loc);
-
- STACK_WIND (frame, ha_statfs_cbk, HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->statfs, loc);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
-}
-
- int32_t
-ha_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- }
- return 0;
-}
-
-int32_t
-ha_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_setxattr_stub (frame, ha_setxattr, loc, dict, flags);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_setxattr_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->setxattr,
- loc,
- dict,
- flags);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno);
-
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- dict);
- }
- return 0;
-}
-
-int32_t
-ha_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_getxattr_stub (frame, ha_getxattr, loc, name);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_getxattr_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->getxattr,
- loc,
- name);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL);
-
- ha_local_wipe (local);
- return 0;
-}
-
-int32_t
-ha_xattrop_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- int ret = -1;
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0) {
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- }
- return 0;
-}
-
-
-int32_t
-ha_xattrop (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
-
- local->stub = fop_xattrop_stub (frame, ha_xattrop, loc, flags, dict);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_xattrop_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->xattrop,
- loc,
- flags,
- dict);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, dict);
-
- ha_local_wipe (local);
- return 0;
-}
-
-int32_t
-ha_fxattrop_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- int ret = -1;
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0)
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
-}
-
-int32_t
-ha_fxattrop (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_fxattrop_stub (frame, ha_fxattrop, fd, flags, dict);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_fxattrop_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->fxattrop,
- fd,
- flags,
- dict);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, dict);
-
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_removexattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int ret = -1;
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- }
- return 0;
-}
-
-int32_t
-ha_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
-
- local->stub = fop_removexattr_stub (frame, ha_removexattr, loc, name);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_removexattr_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->removexattr,
- loc,
- name);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno);
-
- ha_local_wipe (local);
- return 0;
-}
-
-int32_t
-ha_lk_setlk_unlck_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct gf_flock *lock)
-{
- ha_local_t *local = NULL;
- int cnt = 0;
- call_stub_t *stub = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- cnt = --local->call_count;
- if (op_ret == 0)
- local->op_ret = 0;
- UNLOCK (&frame->lock);
-
- if (cnt == 0) {
- stub = local->stub;
- GF_FREE (local->state);
- if (stub->args.lk.lock.l_type == F_UNLCK) {
- STACK_UNWIND (frame, local->op_ret, local->op_errno, &stub->args.lk.lock);
- } else {
- STACK_UNWIND (frame, -1, EIO, NULL);
- }
- call_stub_destroy (stub);
- }
- return 0;
-}
-
-int32_t
-ha_lk_setlk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct gf_flock *lock)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- xlator_t **children = NULL;
- int i = 0, cnt = 0, j = 0;
- int child_count = 0;
- call_frame_t *prev_frame = NULL;
- char *state = NULL;
-
- local = frame->local;
- pvt = this->private;
- children = pvt->children;
- child_count = pvt->child_count;
- prev_frame = cookie;
- state = local->state;
-
- if (op_ret == 0)
- local->op_ret = 0;
-
- if ((op_ret == 0) || (op_ret == -1 && op_errno == ENOTCONN)) {
- for (i = 0; i < child_count; i++) {
- if (prev_frame->this == cookie)
- break;
- }
- i++;
- for (; i < child_count; i++) {
- if (local->state[i])
- break;
- }
- if (i == child_count) {
- call_stub_t *stub = local->stub;
- GF_FREE (local->state);
- STACK_UNWIND (frame, 0, op_errno, &stub->args.lk.lock);
- call_stub_destroy (stub);
- return 0;
- }
- STACK_WIND (frame,
- ha_lk_setlk_cbk,
- children[i],
- children[i]->fops->lk,
- local->stub->args.lk.fd,
- local->stub->args.lk.cmd,
- &local->stub->args.lk.lock);
- return 0;
- } else {
- for (i = 0; i < child_count; i++) {
- if (prev_frame->this == cookie)
- break;
- }
- cnt = 0;
- for (j = 0; j < i; j++) {
- if (state[i])
- cnt++;
- }
- if (cnt) {
- struct gf_flock lock;
- lock = local->stub->args.lk.lock;
- for (i = 0; i < child_count; i++) {
- if (state[i]) {
- STACK_WIND (frame,
- ha_lk_setlk_unlck_cbk,
- children[i],
- children[i]->fops->lk,
- local->stub->args.lk.fd,
- local->stub->args.lk.cmd,
- &lock);
- if (--cnt == 0)
- break;
- }
- }
- return 0;
- } else {
- GF_FREE (local->state);
- call_stub_destroy (local->stub);
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- lock);
- return 0;
- }
- }
-}
-
-int32_t
-ha_lk_getlk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct gf_flock *lock)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- fd_t *fd = NULL;
- int child_count = 0, i = 0;
- xlator_t **children = NULL;
- call_frame_t *prev_frame = NULL;
-
- local = frame->local;
- pvt = this->private;
- fd = local->stub->args.lk.fd;
- child_count = pvt->child_count;
- children = pvt->children;
- prev_frame = cookie;
-
- if (op_ret == 0) {
- GF_FREE (local->state);
- call_stub_destroy (local->stub);
- STACK_UNWIND (frame, 0, 0, lock);
- return 0;
- }
-
- for (i = 0; i < child_count; i++) {
- if (prev_frame->this == children[i])
- break;
- }
-
- for (; i < child_count; i++) {
- if (local->state[i])
- break;
- }
-
- if (i == child_count) {
- GF_FREE (local->state);
- call_stub_destroy (local->stub);
- STACK_UNWIND (frame, op_ret, op_errno, lock);
- return 0;
- }
-
- STACK_WIND (frame,
- ha_lk_getlk_cbk,
- children[i],
- children[i]->fops->lk,
- fd,
- local->stub->args.lk.cmd,
- &local->stub->args.lk.lock);
- return 0;
-}
-
-int32_t
-ha_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *lock)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- hafd_t *hafdp = NULL;
- char *state = NULL;
- int child_count = 0, i = 0, cnt = 0, ret = 0;
- xlator_t **children = NULL;
- uint64_t tmp_hafdp = 0;
- int32_t op_errno = EINVAL;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- children = pvt->children;
- ret = fd_ctx_get (fd, this, &tmp_hafdp);
- if (ret < 0)
- gf_log (this->name, GF_LOG_ERROR, "fd_ctx_get failed");
-
- if (local == NULL) {
- local = frame->local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- local->active = -1;
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- }
- hafdp = (hafd_t *)(long)tmp_hafdp;
-
- if (local->active == -1) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- local->stub = fop_lk_stub (frame, ha_lk, fd, cmd, lock);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!local->state) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- state = hafdp->fdstate;
- LOCK (&hafdp->lock);
- memcpy (local->state, state, child_count);
- UNLOCK (&hafdp->lock);
- if (cmd == F_GETLK) {
- for (i = 0; i < child_count; i++) {
- if (local->state[i])
- break;
- }
- STACK_WIND (frame,
- ha_lk_getlk_cbk,
- children[i],
- children[i]->fops->lk,
- fd,
- cmd,
- lock);
- } else if (cmd == F_SETLK && lock->l_type == F_UNLCK) {
- for (i = 0; i < child_count; i++) {
- if (local->state[i])
- local->call_count++;
- }
- cnt = local->call_count;
- for (i = 0; i < child_count; i++) {
- if (local->state[i]) {
- STACK_WIND (frame,
- ha_lk_setlk_unlck_cbk,
- children[i],
- children[i]->fops->lk,
- fd, cmd, lock);
- if (--cnt == 0)
- break;
- }
- }
- } else {
- for (i = 0; i < child_count; i++) {
- if (local->state[i])
- break;
- }
- STACK_WIND (frame,
- ha_lk_setlk_cbk,
- children[i],
- children[i]->fops->lk,
- fd,
- cmd,
- lock);
- }
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL);
-
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_inode_entry_lk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- }
- return 0;
-}
-
-int32_t
-ha_inodelk (call_frame_t *frame,
- xlator_t *this,
- const char *volume,
- loc_t *loc,
- int32_t cmd,
- struct gf_flock *lock)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_inodelk_stub (frame, ha_inodelk, volume,
- loc, cmd, lock);
- STACK_WIND_COOKIE (frame,
- ha_inode_entry_lk_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->inodelk,
- volume,
- loc,
- cmd,
- lock);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
-}
-
-int32_t
-ha_entrylk (call_frame_t *frame,
- xlator_t *this,
- const char *volume,
- loc_t *loc,
- const char *basename,
- entrylk_cmd cmd,
- entrylk_type type)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_entrylk_stub (frame, ha_entrylk, volume,
- loc, basename, cmd, type);
- STACK_WIND_COOKIE (frame,
- ha_inode_entry_lk_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->entrylk,
- volume, loc, basename, cmd, type);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
-}
-
- int32_t
-ha_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- file_checksum,
- dir_checksum);
- }
- return 0;
-}
-
-int32_t
-ha_checksum (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flag)
-{
- int op_errno = 0;
- ha_local_t *local = NULL;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_checksum_stub (frame, ha_checksum, loc, flag);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_checksum_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->checksum,
- loc,
- flag);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- ha_local_wipe (local);
- return 0;
-}
-
-int32_t
-ha_common_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
-{
- int ret = 0;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0)
- STACK_UNWIND (frame, op_ret, op_errno, entries);
- return 0;
-}
-
-int32_t
-ha_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off);
-
-int32_t
-ha_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off);
-int32_t
-ha_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off, int whichop)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- if (whichop == GF_FOP_READDIR)
- local->stub = fop_readdir_stub (frame, ha_readdir, fd, size,
- off);
- else
- local->stub = fop_readdirp_stub (frame, ha_readdirp, fd, size,
- off);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- if (whichop == GF_FOP_READDIR)
- STACK_WIND_COOKIE (frame, ha_common_readdir_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->readdir,
- fd, size, off);
- else
- STACK_WIND_COOKIE (frame, ha_common_readdir_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->readdirp,
- fd, size, off);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, ENOTCONN, NULL);
-
- ha_local_wipe (local);
- return 0;
-}
-
-
-int32_t
-ha_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off)
-{
- ha_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR);
- return 0;
-}
-
-int32_t
-ha_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off)
-{
- ha_do_readdir (frame, this, fd, size, off, GF_FOP_READDIRP);
- return 0;
-}
-
-/* Management operations */
-
- int32_t
-ha_stats_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *stats)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- int i = 0;
-
- local = frame->local;
- pvt = this->private;
- prev_frame = cookie;
- children = pvt->children;
-
- if (op_ret == -1 && op_errno == ENOTCONN) {
- for (i = 0; i < pvt->child_count; i++) {
- if (prev_frame->this == children[i])
- break;
- }
- i++;
- for (; i < pvt->child_count; i++) {
- if (pvt->state[i])
- break;
- }
-
- if (i == pvt->child_count) {
- STACK_UNWIND (frame, -1, ENOTCONN, NULL);
- return 0;
- }
- STACK_WIND (frame,
- ha_stats_cbk,
- children[i],
- children[i]->mops->stats,
- local->flags);
- return 0;
- }
-
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- stats);
- return 0;
-}
-
-int32_t
-ha_stats (call_frame_t *frame,
- xlator_t *this,
- int32_t flags)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- xlator_t **children = NULL;
- int i = 0;
- int32_t op_errno = EINVAL;
-
- local = frame->local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- pvt = this->private;
- children = pvt->children;
- for (i = 0; i < pvt->child_count; i++) {
- if (pvt->state[i])
- break;
- }
-
- if (i == pvt->child_count) {
- op_errno = ENOTCONN;
- goto err;
- }
- local->flags = flags;
-
- STACK_WIND (frame,
- ha_stats_cbk,
- children[i],
- children[i]->mops->stats,
- flags);
- return 0;
-
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, ENOTCONN, NULL);
-
- ha_local_wipe (local);
- return 0;
-
-}
-
-
-int32_t
-ha_getspec_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- char *spec_data)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- int i = 0;
-
- local = frame->local;
- pvt = this->private;
- prev_frame = cookie;
- children = pvt->children;
-
- if (op_ret == -1 && op_errno == ENOTCONN) {
- for (i = 0; i < pvt->child_count; i++) {
- if (prev_frame->this == children[i])
- break;
- }
- i++;
- for (; i < pvt->child_count; i++) {
- if (pvt->state[i])
- break;
- }
-
- if (i == pvt->child_count) {
- STACK_UNWIND (frame, -1, ENOTCONN, NULL);
- return 0;
- }
- STACK_WIND (frame,
- ha_getspec_cbk,
- children[i],
- children[i]->mops->getspec,
- local->pattern,
- local->flags);
- return 0;
- }
-
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- spec_data);
- return 0;
-}
-
-int32_t
-ha_getspec (call_frame_t *frame,
- xlator_t *this,
- const char *key,
- int32_t flags)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- xlator_t **children = NULL;
- int i = 0;
- int32_t op_errno = EINVAL;
-
- local = frame->local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- pvt = this->private;
- children = pvt->children;
-
- for (i = 0; i < pvt->child_count; i++) {
- if (pvt->state[i])
- break;
- }
-
- if (i == pvt->child_count) {
- op_errno = ENOTCONN;
- goto err;
- }
- local->flags = flags;
- local->pattern = (char *)key;
-
- STACK_WIND (frame,
- ha_getspec_cbk,
- children[i],
- children[i]->mops->getspec,
- key, flags);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, ENOTCONN, NULL);
-
- ha_local_wipe (local);
- return 0;
-
-}
-
-int32_t
-ha_closedir (xlator_t *this,
- fd_t *fd)
-{
- hafd_t *hafdp = NULL;
- int op_errno = 0;
- uint64_t tmp_hafdp = 0;
-
- op_errno = fd_ctx_del (fd, this, &tmp_hafdp);
- if (op_errno != 0) {
- gf_log (this->name, GF_LOG_ERROR, "fd_ctx_del() error");
- return 0;
- }
- hafdp = (hafd_t *)(long)tmp_hafdp;
-
- GF_FREE (hafdp->fdstate);
- GF_FREE (hafdp->path);
- LOCK_DESTROY (&hafdp->lock);
- return 0;
-}
-
-int32_t
-ha_close (xlator_t *this,
- fd_t *fd)
-{
- hafd_t *hafdp = NULL;
- int op_errno = 0;
- uint64_t tmp_hafdp = 0;
-
- op_errno = fd_ctx_del (fd, this, &tmp_hafdp);
- if (op_errno != 0) {
- gf_log (this->name, GF_LOG_ERROR, "fd_ctx_del() error");
- return 0;
- }
- hafdp = (hafd_t *)(long)tmp_hafdp;
-
- GF_FREE (hafdp->fdstate);
- GF_FREE (hafdp->path);
- LOCK_DESTROY (&hafdp->lock);
- return 0;
-}
-
-/* notify */
-int32_t
-notify (xlator_t *this,
- int32_t event,
- void *data,
- ...)
-{
- ha_private_t *pvt = NULL;
- int32_t i = 0, upcnt = 0;
-
- pvt = this->private;
- if (pvt == NULL) {
- gf_log (this->name, GF_LOG_DEBUG, "got notify before init()");
- return 0;
- }
-
- switch (event)
- {
- case GF_EVENT_CHILD_DOWN:
- {
- for (i = 0; i < pvt->child_count; i++) {
- if (data == pvt->children[i])
- break;
- }
- gf_log (this->name, GF_LOG_DEBUG, "GF_EVENT_CHILD_DOWN from %s", pvt->children[i]->name);
- pvt->state[i] = 0;
- for (i = 0; i < pvt->child_count; i++) {
- if (pvt->state[i])
- break;
- }
- if (i == pvt->child_count) {
- default_notify (this, event, data);
- }
- }
- break;
- case GF_EVENT_CHILD_UP:
- {
- for (i = 0; i < pvt->child_count; i++) {
- if (data == pvt->children[i])
- break;
- }
-
- gf_log (this->name, GF_LOG_DEBUG, "GF_EVENT_CHILD_UP from %s", pvt->children[i]->name);
-
- pvt->state[i] = 1;
-
- for (i = 0; i < pvt->child_count; i++) {
- if (pvt->state[i])
- upcnt++;
- }
-
- if (upcnt == 1) {
- default_notify (this, event, data);
- }
- }
- break;
-
- default:
- {
- default_notify (this, event, data);
- }
- }
-
- return 0;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_ha_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-int
-init (xlator_t *this)
-{
- ha_private_t *pvt = NULL;
- xlator_list_t *trav = NULL;
- int count = 0, ret = 0;
-
-
- if (!this->children) {
- gf_log (this->name,GF_LOG_ERROR,
- "FATAL: ha should have one or more child defined");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- trav = this->children;
- pvt = GF_CALLOC (1, sizeof (ha_private_t), gf_ha_mt_ha_private_t);
-
- ret = dict_get_int32 (this->options, "preferred-subvolume",
- &pvt->pref_subvol);
- if (ret < 0) {
- pvt->pref_subvol = -1;
- }
-
- trav = this->children;
- while (trav) {
- count++;
- trav = trav->next;
- }
-
- pvt->child_count = count;
- pvt->children = GF_CALLOC (count, sizeof (xlator_t*),
- gf_ha_mt_xlator_t);
-
- trav = this->children;
- count = 0;
- while (trav) {
- pvt->children[count] = trav->xlator;
- count++;
- trav = trav->next;
- }
-
- pvt->state = GF_CALLOC (1, count, gf_ha_mt_char);
- this->private = pvt;
- return 0;
-}
-
-void
-fini (xlator_t *this)
-{
- ha_private_t *priv = NULL;
- priv = this->private;
- GF_FREE (priv);
- return;
-}
-
-
-struct xlator_fops fops = {
- .lookup = ha_lookup,
- .stat = ha_stat,
- .readlink = ha_readlink,
- .mknod = ha_mknod,
- .mkdir = ha_mkdir,
- .unlink = ha_unlink,
- .rmdir = ha_rmdir,
- .symlink = ha_symlink,
- .rename = ha_rename,
- .link = ha_link,
- .truncate = ha_truncate,
- .create = ha_create,
- .open = ha_open,
- .readv = ha_readv,
- .writev = ha_writev,
- .statfs = ha_statfs,
- .flush = ha_flush,
- .fsync = ha_fsync,
- .setxattr = ha_setxattr,
- .getxattr = ha_getxattr,
- .removexattr = ha_removexattr,
- .opendir = ha_opendir,
- .readdir = ha_readdir,
- .readdirp = ha_readdirp,
- .getdents = ha_getdents,
- .fsyncdir = ha_fsyncdir,
- .access = ha_access,
- .ftruncate = ha_ftruncate,
- .fstat = ha_fstat,
- .lk = ha_lk,
- .setdents = ha_setdents,
- .lookup_cbk = ha_lookup_cbk,
- .checksum = ha_checksum,
- .xattrop = ha_xattrop,
- .fxattrop = ha_fxattrop,
- .setattr = ha_setattr,
- .fsetattr = ha_fsetattr,
-};
-
-struct xlator_cbks cbks = {
- .release = ha_close,
- .releasedir = ha_closedir,
- .forget = ha_forget,
-};
diff --git a/xlators/cluster/ha/src/ha.h b/xlators/cluster/ha/src/ha.h
deleted file mode 100644
index e2ed7eaa68a..00000000000
--- a/xlators/cluster/ha/src/ha.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-#ifndef __HA_H_
-#define __HA_H_
-
-#include "ha-mem-types.h"
-
-typedef struct {
- call_stub_t *stub;
- int32_t op_ret, op_errno;
- int32_t active, tries, revalidate, revalidate_error;
- int32_t call_count;
- char *state, *pattern;
- dict_t *dict;
- loc_t loc;
- struct iatt buf;
- struct iatt postparent;
- struct iatt preparent;
- fd_t *fd;
- inode_t *inode;
- int32_t flags;
- int32_t first_success;
-} ha_local_t;
-
-typedef struct {
- char *state;
- xlator_t **children;
- int child_count, pref_subvol;
-} ha_private_t;
-
-typedef struct {
- char *fdstate;
- char *path;
- gf_lock_t lock;
- int active;
-} hafd_t;
-
-#define HA_ACTIVE_CHILD(this, local) (((ha_private_t *)this->private)->children[local->active])
-
-extern int ha_alloc_init_fd (call_frame_t *frame, fd_t *fd);
-
-extern int ha_handle_cbk (call_frame_t *frame, void *cookie, int op_ret, int op_errno) ;
-
-extern int ha_alloc_init_inode (call_frame_t *frame, inode_t *inode);
-
-#endif
diff --git a/xlators/cluster/map/src/Makefile.am b/xlators/cluster/map/src/Makefile.am
deleted file mode 100644
index a278b05e2d1..00000000000
--- a/xlators/cluster/map/src/Makefile.am
+++ /dev/null
@@ -1,16 +0,0 @@
-xlator_LTLIBRARIES = map.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/cluster
-
-map_la_LDFLAGS = -module -avoid-version
-
-map_la_SOURCES = map.c map-helper.c
-map_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = map.h
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/cluster/map/src/map-helper.c b/xlators/cluster/map/src/map-helper.c
deleted file mode 100644
index 851397b68d3..00000000000
--- a/xlators/cluster/map/src/map-helper.c
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
- Copyright (c) 2009-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include "map.h"
-
-
-xlator_t *
-map_subvol_next (xlator_t *this, xlator_t *prev)
-{
- map_private_t *priv = NULL;
- xlator_t *next = NULL;
- int i = 0;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (priv->xlarray[i].xl == prev) {
- if ((i + 1) < priv->child_count)
- next = priv->xlarray[i + 1].xl;
- break;
- }
- }
-
- return next;
-}
-
-int
-map_subvol_cnt (xlator_t *this, xlator_t *subvol)
-{
- int i = 0;
- int ret = -1;
- map_private_t *priv = NULL;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (subvol == priv->xlarray[i].xl) {
- ret = i;
- break;
- }
- }
-
- return ret;
-}
-
-int
-map_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p)
-{
- map_private_t *priv = NULL;
- int cnt = 0;
- int max = 0;
- uint64_t y = 0;
-
- if (x == ((uint64_t) -1)) {
- y = (uint64_t) -1;
- goto out;
- }
-
- priv = this->private;
-
- max = priv->child_count;
- cnt = map_subvol_cnt (this, subvol);
-
- y = ((x * max) + cnt);
-
-out:
- if (y_p)
- *y_p = y;
-
- return 0;
-}
-
-
-int
-map_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p,
- uint64_t *x_p)
-{
- int cnt = 0;
- int max = 0;
- uint64_t x = 0;
- xlator_t *subvol = 0;
- map_private_t *priv = NULL;
-
- priv = this->private;
- max = priv->child_count;
-
- cnt = y % max;
- x = y / max;
-
- subvol = priv->xlarray[cnt].xl;
-
- if (subvol_p)
- *subvol_p = subvol;
-
- if (x_p)
- *x_p = x;
-
- return 0;
-}
-
-
-xlator_t *
-get_mapping_subvol_from_path (xlator_t *this, const char *path)
-{
- map_private_t *priv = NULL;
- struct map_pattern *map = NULL;
-
- /* To make sure we handle '/' properly */
- if (!strcmp (path, "/"))
- return NULL;
-
- priv = this->private;
-
- map = priv->map;
- while (map) {
- if (!strncmp (map->directory, path, map->dir_len)) {
- if ((path[map->dir_len] == '/') ||
- (path[map->dir_len] == '\0')) {
- return map->xl;
- }
- }
-
- map = map->next;
- }
-
- return priv->default_xl;
-}
-
-xlator_t *
-get_mapping_subvol_from_ctx (xlator_t *this, inode_t *inode)
-{
- uint64_t subvol = 0;
- int ret = -1;
-
- ret = inode_ctx_get (inode, this, &subvol);
- if (ret != 0)
- return NULL;
-
- return (xlator_t *)(long)subvol;
-}
-
-int
-check_multiple_volume_entry (xlator_t *this,
- xlator_t *subvol)
-{
- int ret = -1;
- int idx = 0;
- map_private_t *priv = NULL;
-
- priv = this->private;
-
- for (idx = 0; idx < priv->child_count; idx++) {
- if (priv->xlarray[idx].xl == subvol) {
- if (priv->xlarray[idx].mapped) {
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume '%s' is already mapped",
- subvol->name);
- goto out;
- }
- priv->xlarray[idx].mapped = 1;
- ret = 0;
- goto out;
- }
- }
-
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume '%s' is not found",
- subvol->name);
-
- out:
- return ret;
-}
-
-int
-verify_dir_and_assign_subvol (xlator_t *this,
- const char *directory,
- const char *subvol)
-{
- int default_flag = 0;
- int ret = -1;
- int idx = 0;
- map_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- struct map_pattern *tmp_map = NULL;
-
- priv = this->private;
-
- /* check if directory is valid, ie, its a top level dir, and
- * not includes a '*' in it.
- */
- if (!strcmp ("*", directory)) {
- default_flag = 1;
- } else {
- if (directory[0] != '/') {
- gf_log (this->name, GF_LOG_ERROR,
- "map takes absolute path, starting with '/'. "
- "not '%s'", directory);
- goto out;
- }
- for (idx = 1; idx < (strlen (directory) - 1); idx++) {
- if (directory[idx] == '/') {
- gf_log (this->name, GF_LOG_ERROR,
- "map takes only top level directory, "
- "not '%s'", directory);
- goto out;
- }
- }
- }
-
- /* Assign proper subvolume */
- trav = this->children;
- while (trav) {
- if (!strcmp (trav->xlator->name, subvol)) {
-
- /* Check if there is another directory for
- * same volume, if yes, return error.
- */
- ret = check_multiple_volume_entry (this,
- trav->xlator);
- if (ret != 0) {
- goto out;
- }
-
- ret = 0;
- if (default_flag) {
- if (priv->default_xl) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "'*' specified more than "
- "once. don't confuse me!!!");
- }
-
- priv->default_xl = trav->xlator;
- goto out;
- }
-
- tmp_map = GF_CALLOC (1, sizeof (struct map_pattern),
- gf_map_mt_map_pattern);
- tmp_map->xl = trav->xlator;
- tmp_map->dir_len = strlen (directory);
-
- /* make sure that the top level directory starts
- * with '/' and ends without '/'
- */
- tmp_map->directory = gf_strdup (directory);
- if (directory[tmp_map->dir_len - 1] == '/') {
- tmp_map->dir_len--;
- }
-
- if (!priv->map)
- priv->map = tmp_map;
- else {
- struct map_pattern *trav_map = NULL;
- trav_map = priv->map;
- while (trav_map->next)
- trav_map = trav_map->next;
- trav_map->next = tmp_map;
- }
-
- goto out;
- }
-
- trav = trav->next;
- }
-
- gf_log (this->name, GF_LOG_ERROR,
- "map volume '%s' is not proper subvolume", subvol);
-
- out:
- return ret;
-}
-
-int
-assign_default_subvol (xlator_t *this, const char *default_xl)
-{
- int ret = -1;
- map_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
-
- priv = this->private;
- trav = this->children;
-
- while (trav) {
- if (!strcmp (trav->xlator->name, default_xl)) {
- ret = check_multiple_volume_entry (this,
- trav->xlator);
- if (ret != 0) {
- goto out;
- }
- if (priv->default_xl)
- gf_log (this->name, GF_LOG_WARNING,
- "default-volume option provided, "
- "overriding earlier '*' option");
- priv->default_xl = trav->xlator;
- return 0;
- }
- trav = trav->next;
- }
-
- gf_log (this->name, GF_LOG_ERROR,
- "default-volume value is not an valid subvolume. check again");
- out:
- return -1;
-}
-
-void
-verify_if_all_subvolumes_got_used (xlator_t *this)
-{
- int idx = 0;
- map_private_t *priv = NULL;
-
- priv = this->private;
-
- for (idx = 0; idx < priv->child_count; idx++) {
- if (!priv->xlarray[idx].mapped) {
- if (!priv->default_xl) {
- priv->default_xl = priv->xlarray[idx].xl;
- priv->xlarray[idx].mapped = 1;
- } else {
- gf_log (this->name, GF_LOG_WARNING,
- "subvolume '%s' is not mapped to "
- "any directory",
- priv->xlarray[idx].xl->name);
- }
- }
- }
-
- if (!priv->default_xl) {
- gf_log (this->name, GF_LOG_WARNING,
- "default subvolume not specified, filesystem "
- "may not work properly. Check 'map' translator "
- "documentation for more info");
- }
-
- return ;
-}
diff --git a/xlators/cluster/map/src/map-mem-types.h b/xlators/cluster/map/src/map-mem-types.h
deleted file mode 100644
index 3e89f4736e4..00000000000
--- a/xlators/cluster/map/src/map-mem-types.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __MAP_MEM_TYPES_H__
-#define __MAP_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_map_mem_types_ {
- gf_map_mt_map_private_t = gf_common_mt_end + 1,
- gf_map_mt_map_local_t,
- gf_map_mt_map_xlator_array,
- gf_map_mt_map_pattern,
- gf_map_mt_end
-};
-#endif
-
diff --git a/xlators/cluster/map/src/map.c b/xlators/cluster/map/src/map.c
deleted file mode 100644
index aa2b23c29f1..00000000000
--- a/xlators/cluster/map/src/map.c
+++ /dev/null
@@ -1,2566 +0,0 @@
-/*
- Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include "map.h"
-
-/* TODO :
- * -> support for 'get' 'put' API in through xattrs.
- * -> define the behavior of notify()
- */
-
-static int32_t
-map_stat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf)
-
-{
- call_frame_t *prev = NULL;
- prev = cookie;
-
- map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-static int32_t
-map_setattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *statpre,
- struct iatt *statpost)
-{
- call_frame_t *prev = NULL;
- prev = cookie;
-
- map_itransform (this, prev->this, statpre->ia_ino, &statpre->ia_ino);
- map_itransform (this, prev->this, statpost->ia_ino, &statpost->ia_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, statpre, statpost);
- return 0;
-}
-
-static int32_t
-map_fsetattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *statpre,
- struct iatt *statpost)
-{
- call_frame_t *prev = NULL;